Skip to content

Commit 6dbac93

Browse files
committed
rename the SmartScraperMultiParseMergeFirstGraph to SmartScraperMultiGraph
1 parent 78bd40c commit 6dbac93

File tree

1 file changed

+20
-19
lines changed

1 file changed

+20
-19
lines changed

scrapegraphai/graphs/smart_scraper_multi_graph.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
from pydantic import BaseModel
77
from .base_graph import BaseGraph
88
from .abstract_graph import AbstractGraph
9-
from .smart_scraper_graph import SmartScraperGraph
9+
from .scrape_graph import ScrapeGraph
1010
from ..nodes import (
1111
GraphIteratorNode,
12-
MergeAnswersNode
12+
MergeAnswersNode,
1313
)
1414
from ..utils.copy import safe_deepcopy
1515

1616
class SmartScraperMultiGraph(AbstractGraph):
1717
"""
1818
SmartScraperMultiGraph is a scraping pipeline that scrapes a
19-
list of URLs and generates answers to a given prompt.
19+
list of URLs and merge the content first and finally generates answers to a given prompt.
2020
It only requires a user prompt and a list of URLs.
21+
The difference with the SmartScraperMultiGraph is that in this case the content is merged
22+
before to be passed to the llm.
2123
2224
Attributes:
2325
prompt (str): The user prompt to search the internet.
@@ -34,42 +36,41 @@ class SmartScraperMultiGraph(AbstractGraph):
3436
schema (Optional[BaseModel]): The schema for the graph output.
3537
3638
Example:
37-
>>> search_graph = MultipleSearchGraph(
38-
... "What is Chioggia famous for?",
39-
... {"llm": {"model": "openai/gpt-3.5-turbo"}}
39+
>>> smart_scraper_multi_graph = SmartScraperMultiGraph(
40+
... prompt="Who is Marco Perini?",
41+
... source= [
42+
... "https://perinim.github.io/",
43+
... "https://perinim.github.io/cv/"
44+
... ],
45+
... config={"llm": {"model": "openai/gpt-3.5-turbo"}}
4046
... )
41-
>>> result = search_graph.run()
47+
>>> result = smart_scraper_multi_graph.run()
4248
"""
4349

4450
def __init__(self, prompt: str, source: List[str],
4551
config: dict, schema: Optional[BaseModel] = None):
4652

47-
self.max_results = config.get("max_results", 3)
4853
self.copy_config = safe_deepcopy(config)
4954
self.copy_schema = deepcopy(schema)
50-
5155
super().__init__(prompt, config, source, schema)
5256

5357
def _create_graph(self) -> BaseGraph:
5458
"""
55-
Creates the graph of nodes representing the workflow for web scraping and searching.
56-
57-
Returns:
58-
BaseGraph: A graph instance representing the web scraping and searching workflow.
59+
Creates the graph of nodes representing the workflow for web scraping
60+
and parsing and then merge the content and generates answers to a given prompt.
5961
"""
60-
6162
graph_iterator_node = GraphIteratorNode(
6263
input="user_prompt & urls",
63-
output=["results"],
64+
output=["parsed_doc"],
6465
node_config={
65-
"graph_instance": SmartScraperGraph,
66+
"graph_instance": ScrapeGraph,
6667
"scraper_config": self.copy_config,
6768
},
6869
schema=self.copy_schema
6970
)
7071

7172
merge_answers_node = MergeAnswersNode(
72-
input="user_prompt & results",
73+
input="user_prompt & parsed_doc",
7374
output=["answer"],
7475
node_config={
7576
"llm_model": self.llm_model,
@@ -91,12 +92,12 @@ def _create_graph(self) -> BaseGraph:
9192

9293
def run(self) -> str:
9394
"""
94-
Executes the web scraping and searching process.
95+
Executes the web scraping and parsing process first and
96+
then concatenate the content and generates answers to a given prompt.
9597
9698
Returns:
9799
str: The answer to the prompt.
98100
"""
99101
inputs = {"user_prompt": self.prompt, "urls": self.source}
100102
self.final_state, self.execution_info = self.graph.execute(inputs)
101-
102103
return self.final_state.get("answer", "No answer found.")

0 commit comments

Comments
 (0)