Skip to content

Commit 94d8042

Browse files
committed
rename smart_scraper_multi_graph to smart_scraper_multi_abstract_graph
1 parent 69ff649 commit 94d8042

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
"""
2+
SmartScraperMultiGraph Module
3+
"""
4+
from copy import deepcopy
5+
from typing import List, Optional
6+
from pydantic import BaseModel
7+
from .base_graph import BaseGraph
8+
from .abstract_graph import AbstractGraph
9+
from .smart_scraper_graph import SmartScraperGraph
10+
from ..nodes import (
11+
GraphIteratorNode,
12+
MergeAnswersNode
13+
)
14+
from ..utils.copy import safe_deepcopy
15+
16+
class SmartScraperMultiAbstractGraph(AbstractGraph):
17+
"""
18+
SmartScraperMultiAbstractGraph is a scraping pipeline that scrapes a
19+
list of URLs and generates answers to a given prompt.
20+
It only requires a user prompt and a list of URLs.
21+
The difference with the SmartScraperMultiGraph is that in this case the content will be abstracted
22+
by llm and then merged finally passed to the llm.
23+
24+
Attributes:
25+
prompt (str): The user prompt to search the internet.
26+
llm_model (dict): The configuration for the language model.
27+
embedder_model (dict): The configuration for the embedder model.
28+
headless (bool): A flag to run the browser in headless mode.
29+
verbose (bool): A flag to display the execution information.
30+
model_token (int): The token limit for the language model.
31+
32+
Args:
33+
prompt (str): The user prompt to search the internet.
34+
source (List[str]): The source of the graph.
35+
config (dict): Configuration parameters for the graph.
36+
schema (Optional[BaseModel]): The schema for the graph output.
37+
38+
Example:
39+
>>> smart_scraper_multi_abstract_graph = SmartScraperMultiAbstractGraph(
40+
... "What is Chioggia famous for?",
41+
... {"llm": {"model": "openai/gpt-3.5-turbo"}}
42+
... )
43+
>>> result = smart_scraper_multi_abstract_graph.run()
44+
"""
45+
46+
def __init__(self, prompt: str, source: List[str],
47+
config: dict, schema: Optional[BaseModel] = None):
48+
49+
self.max_results = config.get("max_results", 3)
50+
self.copy_config = safe_deepcopy(config)
51+
self.copy_schema = deepcopy(schema)
52+
53+
super().__init__(prompt, config, source, schema)
54+
55+
def _create_graph(self) -> BaseGraph:
56+
"""
57+
Creates the graph of nodes representing the workflow for web scraping and searching.
58+
59+
Returns:
60+
BaseGraph: A graph instance representing the web scraping and searching workflow.
61+
"""
62+
63+
graph_iterator_node = GraphIteratorNode(
64+
input="user_prompt & urls",
65+
output=["results"],
66+
node_config={
67+
"graph_instance": SmartScraperGraph,
68+
"scraper_config": self.copy_config,
69+
},
70+
schema=self.copy_schema
71+
)
72+
73+
merge_answers_node = MergeAnswersNode(
74+
input="user_prompt & results",
75+
output=["answer"],
76+
node_config={
77+
"llm_model": self.llm_model,
78+
"schema": self.copy_schema
79+
}
80+
)
81+
82+
return BaseGraph(
83+
nodes=[
84+
graph_iterator_node,
85+
merge_answers_node,
86+
],
87+
edges=[
88+
(graph_iterator_node, merge_answers_node),
89+
],
90+
entry_point=graph_iterator_node,
91+
graph_name=self.__class__.__name__
92+
)
93+
94+
def run(self) -> str:
95+
"""
96+
Executes the web scraping and searching process.
97+
98+
Returns:
99+
str: The answer to the prompt.
100+
"""
101+
inputs = {"user_prompt": self.prompt, "urls": self.source}
102+
self.final_state, self.execution_info = self.graph.execute(inputs)
103+
104+
return self.final_state.get("answer", "No answer found.")

0 commit comments

Comments
 (0)