66from pydantic import BaseModel
77from .base_graph import BaseGraph
88from .abstract_graph import AbstractGraph
9- from ..nodes import ( FetchNode , ParseNode , SearchLinkNode )
9+ from ..nodes import (FetchNode ,
10+ SearchLinkNode ,
11+ SearchLinksWithContext )
1012
11- class SearchLinkGraph (AbstractGraph ):
13+ class SearchLinkGraph (AbstractGraph ):
1214 """
1315 SearchLinkGraph is a scraping pipeline that automates the process of
1416 extracting information from web pages using a natural language model
@@ -30,13 +32,7 @@ class SearchLinkGraph(AbstractGraph):
3032 config (dict): Configuration parameters for the graph.
3133 schema (BaseModel, optional): The schema for the graph output. Defaults to None.
3234
33- Example:
34- >>> smart_scraper = SearchLinkGraph(
35- ... "List me all the attractions in Chioggia.",
36- ... "https://en.wikipedia.org/wiki/Chioggia",
37- ... {"llm": {"model": "openai/gpt-3.5-turbo"}}
38- ... )
39- >>> result = smart_scraper.run()
35+
4036 """
4137
4238 def __init__ (self , source : str , config : dict , schema : Optional [BaseModel ] = None ):
@@ -51,28 +47,33 @@ def _create_graph(self) -> BaseGraph:
5147 Returns:
5248 BaseGraph: A graph instance representing the web scraping workflow.
5349 """
54-
5550 fetch_node = FetchNode (
56- input = "url| local_dir" ,
57- output = ["doc" ],
58- node_config = {
59- "llm_model" : self .llm_model ,
60- "force" : self .config .get ("force" , False ),
61- "cut" : self .config .get ("cut" , True ),
62- "loader_kwargs" : self .config .get ("loader_kwargs" , {}),
63- }
64- )
51+ input = "url| local_dir" ,
52+ output = ["doc" ],
53+ node_config = {
54+ "force" : self .config .get ("force" , False ),
55+ "cut" : self .config .get ("cut" , True ),
56+ "loader_kwargs" : self .config .get ("loader_kwargs" , {}),
57+ }
58+ )
6559
66- search_link_node = SearchLinkNode (
67- input = "doc" ,
68- output = ["parsed_doc" ],
69- node_config = {
70- "llm_model" : self .llm_model ,
71- "chunk_size" : self .model_token ,
72- "filter_links" : self .config .get ("filter_links" , None ),
73- "filter_config" : self .config .get ("filter_config" , None )
74- }
75- )
60+ if self .config .get ("llm_style" ) == (True , None ):
61+ search_link_node = SearchLinksWithContext (
62+ input = "doc" ,
63+ output = ["parsed_doc" ],
64+ node_config = {
65+ "llm_model" : self .llm_model ,
66+ "chunk_size" : self .model_token ,
67+ }
68+ )
69+ else :
70+ search_link_node = SearchLinkNode (
71+ input = "doc" ,
72+ output = ["parsed_doc" ],
73+ node_config = {
74+ "chunk_size" : self .model_token ,
75+ }
76+ )
7677
7778 return BaseGraph (
7879 nodes = [
0 commit comments