1+ """
2+ Async example demonstrating how to use the Smart Scraper API with interactive steps.
3+ This example shows how to:
4+ 1. Set up interactive steps for website navigation asynchronously
5+ 2. Use the AsyncClient with custom steps
6+ 3. Handle concurrent requests with different step configurations
7+ 4. Display comprehensive results with timing
8+
9+ Interactive steps allow you to:
10+ - Click on elements
11+ - Fill input fields
12+ - Wait for page loads
13+ - Navigate through multiple pages
14+ - Perform complex user interactions
15+
16+ Requirements:
17+ - Python 3.7+
18+ - scrapegraph-py
19+ - A .env file with your SGAI_API_KEY
20+
21+ Example .env file:
22+ SGAI_API_KEY=your_api_key_here
23+ """
24+
25+ import asyncio
26+ import json
27+ import os
28+ import time
29+ from dotenv import load_dotenv
30+ from scrapegraph_py import AsyncClient
31+
32+ # Load environment variables from .env file
33+ load_dotenv ()
34+
35+
36+ async def async_smartscraper_with_steps ():
37+ """
38+ Enhanced async Smart Scraper function with interactive steps.
39+ This demonstrates how to use interactive movements to navigate websites asynchronously.
40+ """
41+
42+ # Get API key from environment
43+ api_key = os .getenv ("SGAI_API_KEY" )
44+ if not api_key :
45+ raise ValueError (
46+ "API key must be provided or set in .env file as SGAI_API_KEY. "
47+ "Create a .env file with: SGAI_API_KEY=your_api_key_here"
48+ )
49+
50+ # Interactive steps for website navigation
51+ steps = [
52+ "click on search bar" ,
53+ "wait for 500ms" ,
54+ "fill email input box with [email protected] " ,
55+ "wait a sec" ,
56+ "click on the first result of search" ,
57+ "wait for 2 seconds to load the result of search" ,
58+ ]
59+
60+ # Target website configuration
61+ website_url = "https://github.com/"
62+ user_prompt = "Extract user profile information"
63+
64+ print ("🚀 Starting Async Smart Scraper with Interactive Steps..." )
65+ print (f"🌐 Website URL: { website_url } " )
66+ print (f"🎯 User Prompt: { user_prompt } " )
67+ print (f"📋 Interactive Steps: { len (steps )} steps configured" )
68+ print ("\n " + "=" * 60 )
69+
70+ # Display interactive steps
71+ print ("🎯 Interactive Steps to Execute:" )
72+ for i , step in enumerate (steps , 1 ):
73+ print (f" { i } . { step } " )
74+ print ("\n " + "=" * 60 )
75+
76+ # Start timer
77+ start_time = time .time ()
78+ print (f"⏱️ Timer started at: { time .strftime ('%H:%M:%S' , time .localtime (start_time ))} " )
79+ print ("🔄 Processing async request with interactive steps..." )
80+
81+ try :
82+ # Initialize async client
83+ async with AsyncClient .from_env () as client :
84+ # Make request with interactive steps
85+ response = await client .smartscraper (
86+ user_prompt = user_prompt ,
87+ website_url = website_url ,
88+ steps = steps
89+ )
90+
91+ # Calculate execution time
92+ end_time = time .time ()
93+ execution_time = end_time - start_time
94+ execution_minutes = execution_time / 60
95+
96+ print (f"⏱️ Timer stopped at: { time .strftime ('%H:%M:%S' , time .localtime (end_time ))} " )
97+ print (f"⚡ Total execution time: { execution_time :.2f} seconds ({ execution_minutes :.2f} minutes)" )
98+ print (f"📊 Performance: { execution_time :.1f} s ({ execution_minutes :.1f} m) for { len (steps )} interactive steps" )
99+
100+ # Display results
101+ print ("✅ Request completed successfully!" )
102+ print (f"📊 Request ID: { response .get ('request_id' , 'N/A' )} " )
103+ print (f"🔄 Status: { response .get ('status' , 'N/A' )} " )
104+
105+ if response .get ("error" ):
106+ print (f"❌ Error: { response ['error' ]} " )
107+ else :
108+ print ("\n 📋 EXTRACTED DATA:" )
109+ print ("=" * 60 )
110+
111+ # Pretty print the result
112+ if "result" in response :
113+ result_data = response ["result" ]
114+ print (json .dumps (result_data , indent = 2 , ensure_ascii = False ))
115+
116+ # Display extraction statistics
117+ print ("\n 📊 EXTRACTION STATISTICS:" )
118+ print ("-" * 50 )
119+ result_str = json .dumps (result_data )
120+ print (f"📝 Data size: { len (result_str )} characters" )
121+ print (f"🔗 JSON keys: { len (result_data ) if isinstance (result_data , dict ) else 'N/A' } " )
122+ print (f"⚡ Processing speed: { len (result_str )/ execution_time :.0f} chars/second" )
123+ print (f"🎯 Steps efficiency: { execution_time / len (steps ):.2f} s per step" )
124+ else :
125+ print ("No result data found" )
126+
127+ except Exception as e :
128+ end_time = time .time ()
129+ execution_time = end_time - start_time
130+ execution_minutes = execution_time / 60
131+
132+ print (f"⏱️ Timer stopped at: { time .strftime ('%H:%M:%S' , time .localtime (end_time ))} " )
133+ print (f"⚡ Execution time before error: { execution_time :.2f} seconds ({ execution_minutes :.2f} minutes)" )
134+ print (f"💥 Error occurred: { str (e )} " )
135+
136+
137+ async def async_smartscraper_concurrent_steps ():
138+ """
139+ Demonstrate concurrent Smart Scraper requests with different step configurations.
140+ """
141+ print ("\n 🎯 CONCURRENT REQUESTS WITH DIFFERENT STEPS" )
142+ print ("=" * 60 )
143+
144+ # Configuration for concurrent requests
145+ configs = [
146+ {
147+ "name" : "GitHub Search" ,
148+ "url" : "https://github.com/" ,
149+ "prompt" : "Extract repository information" ,
150+ "steps" : [
151+ "click on search bar" ,
152+ "wait for 300ms" ,
153+ "fill search with 'python'" ,
154+ "wait for 1 second" ,
155+ "click first result"
156+ ]
157+ },
158+ {
159+ "name" : "Profile Navigation" ,
160+ "url" : "https://github.com/" ,
161+ "prompt" : "Extract user profile details" ,
162+ "steps" : [
163+ "click on profile menu" ,
164+ "wait for 1 second" ,
165+ "click on settings" ,
166+ "wait for 2 seconds" ,
167+ "scroll to profile section"
168+ ]
169+ },
170+ {
171+ "name" : "Repository Details" ,
172+ "url" : "https://github.com/" ,
173+ "prompt" : "Extract repository details" ,
174+ "steps" : [
175+ "click on repositories tab" ,
176+ "wait for 500ms" ,
177+ "click on first repository" ,
178+ "wait for 1 second" ,
179+ "scroll to readme section"
180+ ]
181+ }
182+ ]
183+
184+ print (f"🔄 Executing { len (configs )} concurrent requests..." )
185+ start_time = time .time ()
186+
187+ try :
188+ async with AsyncClient .from_env () as client :
189+ # Create tasks for concurrent execution
190+ tasks = []
191+ for config in configs :
192+ task = client .smartscraper (
193+ user_prompt = config ["prompt" ],
194+ website_url = config ["url" ],
195+ steps = config ["steps" ]
196+ )
197+ tasks .append (task )
198+
199+ # Execute tasks concurrently
200+ results = await asyncio .gather (* tasks , return_exceptions = True )
201+
202+ # Calculate total execution time
203+ end_time = time .time ()
204+ execution_time = end_time - start_time
205+
206+ print (f"⚡ Total concurrent execution time: { execution_time :.2f} seconds" )
207+ print (f"📊 Average per request: { execution_time / len (configs ):.2f} seconds" )
208+
209+ # Display results
210+ print ("\n 📋 CONCURRENT RESULTS:" )
211+ print ("=" * 60 )
212+
213+ for i , (config , result ) in enumerate (zip (configs , results ), 1 ):
214+ print (f"\n { i } . { config ['name' ]} :" )
215+ print (f" 🎯 Prompt: { config ['prompt' ]} " )
216+ print (f" 📝 Steps: { len (config ['steps' ])} " )
217+
218+ if isinstance (result , Exception ):
219+ print (f" ❌ Error: { str (result )} " )
220+ else :
221+ print (f" ✅ Status: { result .get ('status' , 'N/A' )} " )
222+ print (f" 📊 Request ID: { result .get ('request_id' , 'N/A' )} " )
223+ if "result" in result :
224+ data_size = len (json .dumps (result ["result" ]))
225+ print (f" 📝 Data size: { data_size } characters" )
226+ print ("-" * 40 )
227+
228+ except Exception as e :
229+ print (f"💥 Error in concurrent execution: { str (e )} " )
230+
231+
232+ async def async_smartscraper_step_patterns ():
233+ """
234+ Demonstrate different step patterns for various use cases.
235+ """
236+ print ("\n 🎯 DIFFERENT STEP PATTERNS DEMONSTRATION" )
237+ print ("=" * 60 )
238+
239+ patterns = [
240+ {
241+ "name" : "Authentication Flow" ,
242+ "description" : "Steps for logging into a website" ,
243+ "steps" : [
244+ "click on login button" ,
245+ "wait for 1 second" ,
246+ "fill username field with [email protected] " ,
247+ "wait for 200ms" ,
248+ "fill password field with password123" ,
249+ "wait for 300ms" ,
250+ "click submit button" ,
251+ "wait for 3 seconds"
252+ ]
253+ },
254+ {
255+ "name" : "Form Submission" ,
256+ "description" : "Steps for filling and submitting a form" ,
257+ "steps" : [
258+ "scroll to contact form" ,
259+ "wait for 500ms" ,
260+ "fill name field with John Doe" ,
261+ "wait for 200ms" ,
262+ "fill email field with [email protected] " ,
263+ "wait for 200ms" ,
264+ "fill message field with Hello World" ,
265+ "wait for 300ms" ,
266+ "click submit button"
267+ ]
268+ },
269+ {
270+ "name" : "Dynamic Content Loading" ,
271+ "description" : "Steps for loading more content dynamically" ,
272+ "steps" : [
273+ "scroll to bottom of page" ,
274+ "wait for 1 second" ,
275+ "click load more button" ,
276+ "wait for 2 seconds" ,
277+ "scroll down again" ,
278+ "wait for 1 second" ,
279+ "click show details button"
280+ ]
281+ }
282+ ]
283+
284+ for i , pattern in enumerate (patterns , 1 ):
285+ print (f"\n 📋 Pattern { i } : { pattern ['name' ]} " )
286+ print (f"📝 Description: { pattern ['description' ]} " )
287+ print (f"🎯 Steps ({ len (pattern ['steps' ])} ):" )
288+ for j , step in enumerate (pattern ['steps' ], 1 ):
289+ step_type = "Navigation" if "click" in step else "Wait" if "wait" in step else "Input" if "fill" in step else "Action" if "scroll" in step else "Other"
290+ print (f" { j } . { step } [{ step_type } ]" )
291+ print ("-" * 40 )
292+
293+
294+ async def main ():
295+ """
296+ Main function to run the async Smart Scraper steps example.
297+ """
298+ try :
299+ print ("🎯 ASYNC SMART SCRAPER INTERACTIVE STEPS EXAMPLE" )
300+ print ("=" * 60 )
301+ print ("This example demonstrates how to use interactive steps with Async Smart Scraper." )
302+ print ("Interactive steps allow you to navigate websites like a human user asynchronously." )
303+ print ("This enables faster processing through concurrent requests." )
304+ print ()
305+
306+ await async_smartscraper_with_steps ()
307+ await async_smartscraper_concurrent_steps ()
308+ await async_smartscraper_step_patterns ()
309+
310+ except Exception as e :
311+ print (f"💥 Error occurred: { str (e )} " )
312+ print ("\n 🛠️ Troubleshooting:" )
313+ print ("1. Make sure your .env file contains SGAI_API_KEY" )
314+ print ("2. Check your internet connection" )
315+ print ("3. Verify the target website is accessible" )
316+ print ("4. Ensure you have sufficient credits in your account" )
317+
318+
319+ if __name__ == "__main__" :
320+ asyncio .run (main ())
0 commit comments