File tree Expand file tree Collapse file tree 5 files changed +1024
-1
lines changed Expand file tree Collapse file tree 5 files changed +1024
-1
lines changed Original file line number Diff line number Diff line change 1+ """
2+ Basic example of scraping pipeline using SmartScraper
3+ """
4+ import os
5+ import json
6+ from dotenv import load_dotenv
7+ from scrapegraphai .graphs import SmartScraperGraph
8+ from scrapegraphai .utils import prettify_exec_info
9+
10+ load_dotenv ()
11+
12+ # ************************************************
13+ # Define the configuration for the graph
14+ # ************************************************
15+
16+
17+ graph_config = {
18+ "llm" : {
19+ "model" : "scrapegraphai/smart-scraper" ,
20+ "api_key" : os .getenv ("SCRAPEGRAPH_API_KEY" )
21+ },
22+ "verbose" : True ,
23+ "headless" : False ,
24+ }
25+
26+ # ************************************************
27+ # Create the SmartScraperGraph instance and run it
28+ # ************************************************
29+
30+ smart_scraper_graph = SmartScraperGraph (
31+ prompt = "Extract me all the articles" ,
32+ source = "https://www.wired.com" ,
33+ config = graph_config
34+ )
35+
36+ result = smart_scraper_graph .run ()
37+ print (json .dumps (result , indent = 4 ))
38+
39+ # ************************************************
40+ # Get graph execution info
41+ # ************************************************
42+
43+ graph_exec_info = smart_scraper_graph .get_execution_info ()
44+ print (prettify_exec_info (graph_exec_info ))
Original file line number Diff line number Diff line change @@ -43,7 +43,8 @@ dependencies = [
4343 " transformers>=4.44.2" ,
4444 " googlesearch-python>=1.2.5" ,
4545 " simpleeval>=1.0.0" ,
46- " async_timeout>=4.0.3"
46+ " async_timeout>=4.0.3" ,
47+ " scrapegraph-py>=0.0.4"
4748]
4849
4950license = " MIT"
You can’t perform that action at this time.
0 commit comments