File tree Expand file tree Collapse file tree 5 files changed +81
-6
lines changed Expand file tree Collapse file tree 5 files changed +81
-6
lines changed Original file line number Diff line number Diff line change 1+ """
2+ Basic example of scraping pipeline using SmartScraper
3+ """
4+
5+ import os
6+ import json
7+ from dotenv import load_dotenv
8+ from scrapegraphai .graphs import SmartScraperGraph
9+ from scrapegraphai .utils import prettify_exec_info
10+
11+ load_dotenv ()
12+
13+ # ************************************************
14+ # Define the configuration for the graph
15+ # ************************************************
16+
17+
18+ graph_config = {
19+ "llm" : {
20+ "api_key" : os .getenv ("OPENAI_API_KEY" ),
21+ "model" : "openai/gpt-4o" ,
22+ },
23+ "reasoning" : True ,
24+ "verbose" : True ,
25+ "headless" : False ,
26+ }
27+
28+ # ************************************************
29+ # Create the SmartScraperGraph instance and run it
30+ # ************************************************
31+
32+ smart_scraper_graph = SmartScraperGraph (
33+ prompt = "List me what does the company do, the name and a contact email." ,
34+ source = "https://scrapegraphai.com/" ,
35+ config = graph_config
36+ )
37+
38+ result = smart_scraper_graph .run ()
39+ print (json .dumps (result , indent = 4 ))
40+
41+ # ************************************************
42+ # Get graph execution info
43+ # ************************************************
44+
45+ graph_exec_info = smart_scraper_graph .get_execution_info ()
46+ print (prettify_exec_info (graph_exec_info ))
Original file line number Diff line number Diff line change 99from ..nodes import (
1010 FetchNode ,
1111 ParseNode ,
12+ ReasoningNode ,
1213 GenerateAnswerNode
1314)
1415
@@ -88,6 +89,33 @@ def _create_graph(self) -> BaseGraph:
8889 }
8990 )
9091
92+ if self .config .get ("reasoning" ):
93+ reasoning_node = ReasoningNode (
94+ input = "user_prompt & (relevant_chunks | parsed_doc | doc)" ,
95+ output = ["answer" ],
96+ node_config = {
97+ "llm_model" : self .llm_model ,
98+ "additional_info" : self .config .get ("additional_info" ),
99+ "schema" : self .schema ,
100+ }
101+ )
102+
103+ return BaseGraph (
104+ nodes = [
105+ fetch_node ,
106+ parse_node ,
107+ reasoning_node ,
108+ generate_answer_node ,
109+ ],
110+ edges = [
111+ (fetch_node , parse_node ),
112+ (parse_node , reasoning_node ),
113+ (reasoning_node , generate_answer_node )
114+ ],
115+ entry_point = fetch_node ,
116+ graph_name = self .__class__ .__name__
117+ )
118+
91119 return BaseGraph (
92120 nodes = [
93121 fetch_node ,
Original file line number Diff line number Diff line change 2626from .prompt_refiner_node import PromptRefinerNode
2727from .html_analyzer_node import HtmlAnalyzerNode
2828from .generate_code_node import GenerateCodeNode
29- from .reasoning_node import ReasoningNode
29+ from .reasoning_node import ReasoningNode
Original file line number Diff line number Diff line change @@ -50,12 +50,13 @@ def __init__(
5050 )
5151
5252 self .additional_info = node_config .get ("additional_info" , None )
53-
53+
5454 self .output_schema = node_config .get ("schema" )
5555
5656 def execute (self , state : dict ) -> dict :
5757 """
58- Generate a refined prompt for the reasoning task based on the user's input and the JSON schema.
58+ Generate a refined prompt for the reasoning task based
59+ on the user's input and the JSON schema.
5960
6061 Args:
6162 state (dict): The current state of the graph. The input keys will be used
@@ -70,11 +71,11 @@ def execute(self, state: dict) -> dict:
7071 """
7172
7273 self .logger .info (f"--- Executing { self .node_name } Node ---" )
73-
74+
7475 user_prompt = state ['user_prompt' ]
7576
7677 self .simplefied_schema = transform_schema (self .output_schema .schema ())
77-
78+
7879 if self .additional_info is not None :
7980 prompt = PromptTemplate (
8081 template = TEMPLATE_REASONING_WITH_CONTEXT ,
Original file line number Diff line number Diff line change 3131**Reasoning Output**:
3232[Your detailed analysis based on the above instructions]
3333"""
34-
34+
3535TEMPLATE_REASONING_WITH_CONTEXT = """
3636**Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from a markdown file previously parsed froma a HTML file.
3737
You can’t perform that action at this time.
0 commit comments