Skip to content

Commit 4233430

Browse files
committed
add integration on the fetch node
1 parent 9661c77 commit 4233430

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

scrapegraphai/nodes/fetch_node.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from langchain_community.document_loaders import AsyncHtmlLoader
77
from langchain_core.documents import Document
88
from .base_node import BaseNode
9+
from ..utils.remover import remover
910

1011

1112
class FetchNode(BaseNode):
@@ -71,14 +72,13 @@ def execute(self, state):
7172

7273
# if it is a local directory
7374
if not source.startswith("http"):
74-
document = [Document(page_content=source, metadata={
75+
document = [Document(page_content=remover(source), metadata={
7576
"source": "local_dir"
7677
})]
7778

7879
# if it is a URL
7980
else:
8081
loader = AsyncHtmlLoader(source)
8182
document = loader.load()
82-
8383
state.update({self.output[0]: document})
8484
return state

0 commit comments

Comments
 (0)