1+ """
2+ SmartScraperGraph Module Burr Version
3+ """
4+ from typing import Tuple
5+
6+ from burr import tracking
7+ from burr .core import Application , ApplicationBuilder , State , default , when
8+ from burr .core .action import action
9+
10+ from langchain_community .document_loaders import AsyncChromiumLoader
11+ from langchain_core .documents import Document
12+ from ..utils .remover import remover
13+
14+
15+ @action (reads = ["url" , "local_dir" ], writes = ["doc" ])
16+ def fetch_node (state : State , headless : bool = True , verbose : bool = False ) -> tuple [dict , State ]:
17+ if verbose :
18+ print (f"--- Executing Fetch Node ---" )
19+
20+ source = state .get ("url" , state .get ("local_dir" ))
21+
22+ if self .input == "json_dir" or self .input == "xml_dir" or self .input == "csv_dir" :
23+ compressed_document = [Document (page_content = source , metadata = {
24+ "source" : "local_dir"
25+ })]
26+ # if it is a local directory
27+ elif not source .startswith ("http" ):
28+ compressed_document = [Document (page_content = remover (source ), metadata = {
29+ "source" : "local_dir"
30+ })]
31+
32+ else :
33+ if self .node_config is not None and self .node_config .get ("endpoint" ) is not None :
34+
35+ loader = AsyncChromiumLoader (
36+ [source ],
37+ proxies = {"http" : self .node_config ["endpoint" ]},
38+ headless = headless ,
39+ )
40+ else :
41+ loader = AsyncChromiumLoader (
42+ [source ],
43+ headless = headless ,
44+ )
45+
46+ document = loader .load ()
47+ compressed_document = [
48+ Document (page_content = remover (str (document [0 ].page_content )))]
49+
50+ return {"doc" : compressed_document }, state .update (doc = compressed_document )
51+
52+ @action (reads = ["doc" ], writes = ["parsed_doc" ])
53+ def parse_node (state : State , chunk_size : int ) -> tuple [dict , State ]:
54+ return {}, state
55+
56+ @action (reads = ["user_prompt" , "parsed_doc" , "doc" ],
57+ writes = ["relevant_chunks" ])
58+ def rag_node (state : State , llm_model : object , embedder_model : object ) -> tuple [dict , State ]:
59+ return {}, state
60+
61+ @action (reads = ["user_prompt" , "relevant_chunks" , "parsed_doc" , "doc" ],
62+ writes = ["answer" ])
63+ def generate_answer_node (state : State , llm_model : object ) -> tuple [dict , State ]:
64+ return {}, state
65+
66+ def run (prompt : str , input_key : str , source : str , config : dict ) -> str :
67+
68+ llm_model = config ["llm_model" ]
69+ embedder_model = config ["embedder_model" ]
70+ chunk_size = config ["model_token" ]
71+
72+ initial_state = {
73+ "user_prompt" : prompt ,
74+ input_key : source
75+ }
76+ app = (
77+ ApplicationBuilder ()
78+ .with_actions (
79+ fetch_node = fetch_node ,
80+ parse_node = parse_node ,
81+ rag_node = rag_node ,
82+ generate_answer_node = generate_answer_node
83+ )
84+ .with_transitions (
85+ ("fetch_node" , "parse_node" , default ),
86+ ("parse_node" , "rag_node" , default ),
87+ ("rag_node" , "generate_answer_node" , default )
88+ )
89+ .with_entrypoint ("fetch_node" )
90+ .with_state (** initial_state )
91+ .build ()
92+ )
93+ app .visualize (
94+ output_file_path = "smart_scraper_graph" ,
95+ include_conditions = False , view = True , format = "png"
96+ )
97+ # last_action, result, state = app.run(
98+ # halt_after=["generate_answer_node"],
99+ # inputs={
100+ # "llm_model": llm_model,
101+ # "embedder_model": embedder_model,
102+ # "model_token": chunk_size
103+ # }
104+ # )
105+ # return result.get("answer", "No answer found.")
106+
107+ if __name__ == '__main__' :
108+
109+ prompt = "What is the capital of France?"
110+ source = "https://en.wikipedia.org/wiki/Paris"
111+ input_key = "url"
112+ config = {
113+ "llm_model" : "rag-token" ,
114+ "embedder_model" : "foo" ,
115+ "model_token" : "bar" ,
116+ }
117+ run (prompt , input_key , source , config )
0 commit comments