refactored and fixed single chunk bug

PeriniM · PeriniM · commit b2d170cd8603 · 2024-04-17T11:50:19.000+02:00
diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py
@@ -46,7 +46,7 @@ def _create_graph(self):
                 "embedder_model": self.embedder_model
             }
         )
-        generate_answer_node = GenerateScraperNode(
+        generate_scraper_node = GenerateScraperNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
             node_config={"llm": self.llm_model},
@@ -57,12 +57,12 @@ def _create_graph(self):
                 fetch_node,
                 parse_node,
                 rag_node,
-                generate_answer_node,
+                generate_scraper_node,
             },
             edges={
                 (fetch_node, parse_node),
                 (parse_node, rag_node),
-                (rag_node, generate_answer_node)
+                (rag_node, generate_scraper_node)
             },
             entry_point=fetch_node
         )
diff --git a/scrapegraphai/nodes/generate_scraper_node.py b/scrapegraphai/nodes/generate_scraper_node.py
@@ -121,31 +121,28 @@ def execute(self, state):
 
         # Use tqdm to add progress bar
         for i, chunk in enumerate(tqdm(doc, desc="Processing chunks")):
-            if len(doc) == 1:
-                prompt = PromptTemplate(
-                    template=template_no_chunks,
-                    input_variables=["question"],
-                    partial_variables={"context": chunk.page_content,
-                                       "chunk_id": i + 1,
-                                       "format_instructions": format_instructions},
-                )
+            if len(doc) > 1:
+                template = template_chunks
             else:
-                prompt = PromptTemplate(
-                    template=template_chunks,
-                    input_variables=["question"],
-                    partial_variables={"context": chunk.page_content,
-                                       "chunk_id": i + 1,
-                                       "format_instructions": format_instructions},
-                )
+                template = template_no_chunks
+
+            prompt = PromptTemplate(
+                template=template,
+                input_variables=["question"],
+                partial_variables={"context": chunk.page_content,
+                                    "chunk_id": i + 1,
+                                    "format_instructions": format_instructions},
+            )
             # Dynamically name the chains based on their index
             chain_name = f"chunk{i+1}"
             chains_dict[chain_name] = prompt | self.llm_model | output_parser
 
+        # Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
+        map_chain = RunnableParallel(**chains_dict)
+        # Chain
+        answer = map_chain.invoke({"question": user_prompt})
+
         if len(chains_dict) > 1:
-            # Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
-            map_chain = RunnableParallel(**chains_dict)
-            # Chain
-            answer_map = map_chain.invoke({"question": user_prompt})
 
             # Merge the answers from the chunks
             merge_prompt = PromptTemplate(
@@ -155,11 +152,7 @@ def execute(self, state):
             )
             merge_chain = merge_prompt | self.llm_model | output_parser
             answer = merge_chain.invoke(
-                {"context": answer_map, "question": user_prompt})
-
-            # Update the state with the generated answer
-            state.update({self.output[0]: answer})
-            return state
-        else:
-            state.update({self.output[0]: chains_dict})
-            return state
+                {"context": answer, "question": user_prompt})
+
+        state.update({self.output[0]: answer})
+        return state