reasoning node refactoring

vedovati-matteo · vedovati-matteo · commit b7b3e9660f02 · 2024-09-26T18:33:22.000+02:00
diff --git a/scrapegraphai/nodes/reasoning_node.py b/scrapegraphai/nodes/reasoning_node.py
@@ -12,10 +12,15 @@
 from tqdm import tqdm
 from .base_node import BaseNode
 from ..utils import transform_schema
+from ..prompts import (
+    TEMPLATE_REASONING, TEMPLATE_REASONING_WITH_CONTEXT
+)
 
 class ReasoningNode(BaseNode):
     """
-    ...
+    A node that refine the user prompt with the use of the schema and additional context and
+    create a precise prompt in subsequent steps that explicitly link elements in the user's
+    original input to their corresponding representations in the JSON schema.
 
     Attributes:
         llm_model: An instance of a language model client, configured for generating answers.
@@ -55,7 +60,7 @@ def __init__(
 
     def execute(self, state: dict) -> dict:
         """
-        ...
+        Generate a refined prompt for the reasoning task based on the user's input and the JSON schema.
 
         Args:
             state (dict): The current state of the graph. The input keys will be used
@@ -70,75 +75,6 @@ def execute(self, state: dict) -> dict:
         """
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
-
-        TEMPLATE_REASONING = """
-        **Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from HTML.
-
-        **User's Request**:
-        {user_input}
-
-        **Target JSON Schema**:
-        ```json
-        {json_schema}
-        ```
-
-        **Analysis Instructions**:
-        1. **Interpret User Request:** 
-        * Identify the key information types or entities the user is seeking.
-        * Note any specific attributes, relationships, or constraints mentioned.
-
-        2. **Map to JSON Schema**:
-        * For each identified element in the user request, locate its corresponding field in the JSON schema.
-        * Explain how the schema structure represents the requested information.
-        * Highlight any relevant schema elements not explicitly mentioned in the user's request.
-
-        3. **Data Transformation Guidance**:
-        * Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
-
-        This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
-
-        **Reasoning Output**:
-        [Your detailed analysis based on the above instructions]
-        """
-                
-        TEMPLATE_REASONING_WITH_CONTEXT = """
-        **Task**: Analyze the user's request, provided JSON schema, and additional context to guide an LLM in extracting information directly from HTML.
-
-        **User's Request**:
-        {user_input}
-
-        **Target JSON Schema**:
-        ```json
-        {json_schema}
-        ```
-
-        **Additional Context**:
-        {additional_context}
-
-        **Analysis Instructions**:
-        1. **Interpret User Request and Context:** 
-        * Identify the key information types or entities the user is seeking.
-        * Note any specific attributes, relationships, or constraints mentioned.
-        * Incorporate insights from the additional context to refine understanding of the task.
-
-        2. **Map to JSON Schema**:
-        * For each identified element in the user request, locate its corresponding field in the JSON schema.
-        * Explain how the schema structure represents the requested information.
-        * Highlight any relevant schema elements not explicitly mentioned in the user's request.
-
-        3. **Extraction Strategy**:
-        * Based on the additional context, suggest specific strategies for locating and extracting the required information from the HTML.
-        * Highlight any potential challenges or special considerations mentioned in the context.
-
-        4. **Data Transformation Guidance**:
-        * Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
-        * Note any special formatting, validation, or business logic considerations from the additional context.
-
-        This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
-
-        **Reasoning Output**:
-        [Your detailed analysis based on the above instructions, incorporating insights from the additional context]
-        """
         
         user_prompt = state['user_prompt']
 
diff --git a/scrapegraphai/prompts/__init__.py b/scrapegraphai/prompts/__init__.py
@@ -18,4 +18,5 @@
                                          TEMPLATE_EXECUTION_ANALYSIS, TEMPLATE_EXECUTION_CODE_GENERATION,
                                          TEMPLATE_VALIDATION_ANALYSIS, TEMPLATE_VALIDATION_CODE_GENERATION,
                                          TEMPLATE_SEMANTIC_COMPARISON, TEMPLATE_SEMANTIC_ANALYSIS,
-                                         TEMPLATE_SEMANTIC_CODE_GENERATION)
+                                         TEMPLATE_SEMANTIC_CODE_GENERATION)
+from .reasoning_node_prompts import TEMPLATE_REASONING, TEMPLATE_REASONING_WITH_CONTEXT
diff --git a/scrapegraphai/prompts/reasoning_node_prompts.py b/scrapegraphai/prompts/reasoning_node_prompts.py
@@ -0,0 +1,72 @@
+"""
+Reasoning prompts helper
+"""
+
+TEMPLATE_REASONING = """
+**Task**: Analyze the user's request and the provided JSON schema to guide an LLM in extracting information directly from HTML.
+
+**User's Request**:
+{user_input}
+
+**Target JSON Schema**:
+```json
+{json_schema}
+```
+
+**Analysis Instructions**:
+1. **Interpret User Request:** 
+* Identify the key information types or entities the user is seeking.
+* Note any specific attributes, relationships, or constraints mentioned.
+
+2. **Map to JSON Schema**:
+* For each identified element in the user request, locate its corresponding field in the JSON schema.
+* Explain how the schema structure represents the requested information.
+* Highlight any relevant schema elements not explicitly mentioned in the user's request.
+
+3. **Data Transformation Guidance**:
+* Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
+
+This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
+
+**Reasoning Output**:
+[Your detailed analysis based on the above instructions]
+"""
+        
+TEMPLATE_REASONING_WITH_CONTEXT = """
+**Task**: Analyze the user's request, provided JSON schema, and additional context to guide an LLM in extracting information directly from HTML.
+
+**User's Request**:
+{user_input}
+
+**Target JSON Schema**:
+```json
+{json_schema}
+```
+
+**Additional Context**:
+{additional_context}
+
+**Analysis Instructions**:
+1. **Interpret User Request and Context:** 
+* Identify the key information types or entities the user is seeking.
+* Note any specific attributes, relationships, or constraints mentioned.
+* Incorporate insights from the additional context to refine understanding of the task.
+
+2. **Map to JSON Schema**:
+* For each identified element in the user request, locate its corresponding field in the JSON schema.
+* Explain how the schema structure represents the requested information.
+* Highlight any relevant schema elements not explicitly mentioned in the user's request.
+
+3. **Extraction Strategy**:
+* Based on the additional context, suggest specific strategies for locating and extracting the required information from the HTML.
+* Highlight any potential challenges or special considerations mentioned in the context.
+
+4. **Data Transformation Guidance**:
+* Provide guidance on any necessary transformations to align extracted data with the JSON schema requirements.
+* Note any special formatting, validation, or business logic considerations from the additional context.
+
+This analysis will be used to instruct an LLM that has the HTML content in its context. The LLM will use this guidance to extract the information and return it directly in the specified JSON format.
+
+**Reasoning Output**:
+[Your detailed analysis based on the above instructions, incorporating insights from the additional context]
+"""