@@ -51,7 +51,8 @@ def __init__(
5151 False if node_config is None else node_config .get ("verbose" , False )
5252 )
5353 self .useSoup = (
54- True if node_config is None else node_config .get ("useSoup" , True )
54+ False if node_config is None else node_config .get ("useSoup" , False )
55+ )
5556 self .loader_kwargs = (
5657 {} if node_config is None else node_config .get ("loader_kwargs" , {})
5758 )
@@ -117,7 +118,7 @@ def execute(self, state):
117118 pass
118119
119120 elif not source .startswith ("http" ):
120- compressed_document = [Document (page_content = cleanup_html (source ),
121+ compressed_document = [Document (page_content = cleanup_html (data , source ),
121122 metadata = {"source" : "local_dir" }
122123 )]
123124
@@ -127,7 +128,7 @@ def execute(self, state):
127128 cleanedup_html = cleanup_html (response .text , source )
128129 compressed_document = [Document (page_content = cleanedup_html )]
129130 else :
130- print (f"Failed to retrieve contents from the webpage at url: { url } " )
131+ print (f"Failed to retrieve contents from the webpage at url: { source } " )
131132
132133 else :
133134 loader_kwargs = {}
@@ -139,7 +140,7 @@ def execute(self, state):
139140
140141 document = loader .load ()
141142 compressed_document = [
142- Document (page_content = cleanup_html (str (document [0 ].page_content )) )
143+ Document (page_content = cleanup_html (str (document [0 ].page_content ), source ), metadata = { "source" : source } )
143144 ]
144145
145146 state .update ({self .output [0 ]: compressed_document })
0 commit comments