From 79c8046711886730ed1c3f5531aa00cac21a824e Mon Sep 17 00:00:00 2001 From: flst01 <168219861+flst01@users.noreply.github.com> Date: Thu, 8 May 2025 02:10:55 +0200 Subject: [PATCH 1/2] Fix issue: Burr integration by updating fetch_node.py Before Burr integration would cause an error in fetch: ValueError: Action Fetch attempted to write to keys {'original_html'} that it did not declare. It declared: (['doc'])! --- scrapegraphai/nodes/fetch_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index ec202f3f..2637a70a 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -356,7 +356,7 @@ def handle_web_source(self, state, source): compressed_document = [ Document(page_content=parsed_content, metadata={"source": "html file"}) ] - state["original_html"] = document + state["doc"] = document state.update( { self.output[0]: compressed_document, From e660914994d900eb43bdc80d67ba31f6ed241fd5 Mon Sep 17 00:00:00 2001 From: flst01 <168219861+flst01@users.noreply.github.com> Date: Thu, 8 May 2025 02:30:39 +0200 Subject: [PATCH 2/2] Fixed Issue: Burr integration ParseNode by updating parse_node.py Before: Using Burr Integration in SmartScraperGraph resulted in error: ValueError: Action ParseNode attempted to write to keys {'content'} that it did not declare. It declared: (['parsed_doc'])! --- scrapegraphai/nodes/parse_node.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index 1c409da2..44cd5896 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -121,7 +121,6 @@ def execute(self, state: dict) -> dict: state.update({self.output[0]: chunks}) state.update({"parsed_doc": chunks}) - state.update({"content": chunks}) if self.parse_urls: state.update({self.output[1]: link_urls})