Skip to content

Commit de10b28

Browse files
committed
feat: update search_link_graph
1 parent 369332b commit de10b28

File tree

3 files changed

+31
-31
lines changed

3 files changed

+31
-31
lines changed

scrapegraphai/graphs/search_link_graph.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
from pydantic import BaseModel
77
from .base_graph import BaseGraph
88
from .abstract_graph import AbstractGraph
9-
from ..nodes import ( FetchNode, ParseNode, SearchLinkNode )
9+
from ..nodes import (FetchNode,
10+
SearchLinkNode,
11+
SearchLinksWithContext)
1012

11-
class SearchLinkGraph(AbstractGraph):
13+
class SearchLinkGraph(AbstractGraph):
1214
"""
1315
SearchLinkGraph is a scraping pipeline that automates the process of
1416
extracting information from web pages using a natural language model
@@ -30,13 +32,7 @@ class SearchLinkGraph(AbstractGraph):
3032
config (dict): Configuration parameters for the graph.
3133
schema (BaseModel, optional): The schema for the graph output. Defaults to None.
3234
33-
Example:
34-
>>> smart_scraper = SearchLinkGraph(
35-
... "List me all the attractions in Chioggia.",
36-
... "https://en.wikipedia.org/wiki/Chioggia",
37-
... {"llm": {"model": "openai/gpt-3.5-turbo"}}
38-
... )
39-
>>> result = smart_scraper.run()
35+
4036
"""
4137

4238
def __init__(self, source: str, config: dict, schema: Optional[BaseModel] = None):
@@ -51,28 +47,33 @@ def _create_graph(self) -> BaseGraph:
5147
Returns:
5248
BaseGraph: A graph instance representing the web scraping workflow.
5349
"""
54-
5550
fetch_node = FetchNode(
56-
input="url| local_dir",
57-
output=["doc"],
58-
node_config={
59-
"llm_model": self.llm_model,
60-
"force": self.config.get("force", False),
61-
"cut": self.config.get("cut", True),
62-
"loader_kwargs": self.config.get("loader_kwargs", {}),
63-
}
64-
)
51+
input="url| local_dir",
52+
output=["doc"],
53+
node_config={
54+
"force": self.config.get("force", False),
55+
"cut": self.config.get("cut", True),
56+
"loader_kwargs": self.config.get("loader_kwargs", {}),
57+
}
58+
)
6559

66-
search_link_node = SearchLinkNode(
67-
input="doc",
68-
output=["parsed_doc"],
69-
node_config={
70-
"llm_model": self.llm_model,
71-
"chunk_size": self.model_token,
72-
"filter_links": self.config.get("filter_links", None),
73-
"filter_config": self.config.get("filter_config", None)
74-
}
75-
)
60+
if self.config.get("llm_style") == (True, None):
61+
search_link_node = SearchLinksWithContext(
62+
input="doc",
63+
output=["parsed_doc"],
64+
node_config={
65+
"llm_model": self.llm_model,
66+
"chunk_size": self.model_token,
67+
}
68+
)
69+
else:
70+
search_link_node = SearchLinkNode(
71+
input="doc",
72+
output=["parsed_doc"],
73+
node_config={
74+
"chunk_size": self.model_token,
75+
}
76+
)
7677

7778
return BaseGraph(
7879
nodes=[

scrapegraphai/nodes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@
2323
from .fetch_screen_node import FetchScreenNode
2424
from .generate_answer_from_image_node import GenerateAnswerFromImageNode
2525
from .concat_answers_node import ConcatAnswersNode
26+
from .search_node_with_context import SearchLinksWithContext

scrapegraphai/nodes/search_link_node.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,6 @@ def __init__(
4040
):
4141
super().__init__(node_name, "node", input, output, 1, node_config)
4242

43-
self.llm_model = node_config["llm_model"]
44-
4543
if node_config.get("filter_links", False) or "filter_config" in node_config:
4644
provided_filter_config = node_config.get("filter_config", {})
4745
self.filter_config = {**default_filters.filter_dict, **provided_filter_config}

0 commit comments

Comments
 (0)