66from pydantic import BaseModel
77from .base_graph import BaseGraph
88from .abstract_graph import AbstractGraph
9- from .smart_scraper_graph import SmartScraperGraph
9+ from .scrape_graph import ScrapeGraph
1010from ..nodes import (
1111 GraphIteratorNode ,
12- MergeAnswersNode
12+ MergeAnswersNode ,
1313)
1414from ..utils .copy import safe_deepcopy
1515
1616class SmartScraperMultiGraph (AbstractGraph ):
1717 """
1818 SmartScraperMultiGraph is a scraping pipeline that scrapes a
19- list of URLs and generates answers to a given prompt.
19+ list of URLs and merge the content first and finally generates answers to a given prompt.
2020 It only requires a user prompt and a list of URLs.
21+ The difference with the SmartScraperMultiGraph is that in this case the content is merged
22+ before to be passed to the llm.
2123
2224 Attributes:
2325 prompt (str): The user prompt to search the internet.
@@ -34,42 +36,41 @@ class SmartScraperMultiGraph(AbstractGraph):
3436 schema (Optional[BaseModel]): The schema for the graph output.
3537
3638 Example:
37- >>> search_graph = MultipleSearchGraph(
38- ... "What is Chioggia famous for?",
39- ... {"llm": {"model": "openai/gpt-3.5-turbo"}}
39+ >>> smart_scraper_multi_graph = SmartScraperMultiGraph(
40+ ... prompt="Who is Marco Perini?",
41+ ... source= [
42+ ... "https://perinim.github.io/",
43+ ... "https://perinim.github.io/cv/"
44+ ... ],
45+ ... config={"llm": {"model": "openai/gpt-3.5-turbo"}}
4046 ... )
41- >>> result = search_graph .run()
47+ >>> result = smart_scraper_multi_graph .run()
4248 """
4349
4450 def __init__ (self , prompt : str , source : List [str ],
4551 config : dict , schema : Optional [BaseModel ] = None ):
4652
47- self .max_results = config .get ("max_results" , 3 )
4853 self .copy_config = safe_deepcopy (config )
4954 self .copy_schema = deepcopy (schema )
50-
5155 super ().__init__ (prompt , config , source , schema )
5256
5357 def _create_graph (self ) -> BaseGraph :
5458 """
55- Creates the graph of nodes representing the workflow for web scraping and searching.
56-
57- Returns:
58- BaseGraph: A graph instance representing the web scraping and searching workflow.
59+ Creates the graph of nodes representing the workflow for web scraping
60+ and parsing and then merge the content and generates answers to a given prompt.
5961 """
60-
6162 graph_iterator_node = GraphIteratorNode (
6263 input = "user_prompt & urls" ,
63- output = ["results " ],
64+ output = ["parsed_doc " ],
6465 node_config = {
65- "graph_instance" : SmartScraperGraph ,
66+ "graph_instance" : ScrapeGraph ,
6667 "scraper_config" : self .copy_config ,
6768 },
6869 schema = self .copy_schema
6970 )
7071
7172 merge_answers_node = MergeAnswersNode (
72- input = "user_prompt & results " ,
73+ input = "user_prompt & parsed_doc " ,
7374 output = ["answer" ],
7475 node_config = {
7576 "llm_model" : self .llm_model ,
@@ -91,12 +92,12 @@ def _create_graph(self) -> BaseGraph:
9192
9293 def run (self ) -> str :
9394 """
94- Executes the web scraping and searching process.
95+ Executes the web scraping and parsing process first and
96+ then concatenate the content and generates answers to a given prompt.
9597
9698 Returns:
9799 str: The answer to the prompt.
98100 """
99101 inputs = {"user_prompt" : self .prompt , "urls" : self .source }
100102 self .final_state , self .execution_info = self .graph .execute (inputs )
101-
102103 return self .final_state .get ("answer" , "No answer found." )
0 commit comments