We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9661c77 commit 4233430Copy full SHA for 4233430
scrapegraphai/nodes/fetch_node.py
@@ -6,6 +6,7 @@
6
from langchain_community.document_loaders import AsyncHtmlLoader
7
from langchain_core.documents import Document
8
from .base_node import BaseNode
9
+from ..utils.remover import remover
10
11
12
class FetchNode(BaseNode):
@@ -71,14 +72,13 @@ def execute(self, state):
71
72
73
# if it is a local directory
74
if not source.startswith("http"):
- document = [Document(page_content=source, metadata={
75
+ document = [Document(page_content=remover(source), metadata={
76
"source": "local_dir"
77
})]
78
79
# if it is a URL
80
else:
81
loader = AsyncHtmlLoader(source)
82
document = loader.load()
-
83
state.update({self.output[0]: document})
84
return state
0 commit comments