Update prompts and payloads

BenConstable9 · BenConstable9 · commit 82ec6a93e94b · 2025-01-17T18:23:27.000Z
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/autogen_text_2_sql.py b/text_2_sql/autogen/src/autogen_text_2_sql/autogen_text_2_sql.py
@@ -15,7 +15,6 @@
 from autogen_agentchat.messages import TextMessage
 import json
 import os
-from datetime import datetime
 import re
 
 from text_2_sql_core.payloads.interaction_payloads import (
@@ -25,28 +24,33 @@
     ProcessingUpdatePayload,
     InteractionPayload,
     PayloadType,
+    DEFAULT_INJECTED_PARAMETERS,
 )
 from autogen_agentchat.base import TaskResult
 from typing import AsyncGenerator
 
 
 class AutoGenText2Sql:
-    def __init__(self, **kwargs: dict):
+    def __init__(self, **kwargs):
         self.target_engine = os.environ["Text2Sql__DatabaseEngine"].upper()
-        self.kwargs = kwargs
+
+        if "use_case" not in kwargs:
+            logging.warning(
+                "No use case provided. It is advised to provide a use case to help the LLM reason."
+            )
+
+        self.kwargs = {**DEFAULT_INJECTED_PARAMETERS, **kwargs}
 
     def get_all_agents(self):
         """Get all agents for the complete flow."""
-        # Get current datetime for the Query Rewrite Agent
-        current_datetime = datetime.now()
 
         self.user_message_rewrite_agent = LLMAgentCreator.create(
-            "user_message_rewrite_agent", current_datetime=current_datetime
+            "user_message_rewrite_agent", **self.kwargs
         )
 
         self.parallel_query_solving_agent = ParallelQuerySolvingAgent(**self.kwargs)
 
-        self.answer_agent = LLMAgentCreator.create("answer_agent")
+        self.answer_agent = LLMAgentCreator.create("answer_agent", **self.kwargs)
 
         agents = [
             self.user_message_rewrite_agent,
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py
@@ -7,6 +7,13 @@
 from datetime import datetime, timezone
 from uuid import uuid4
 
+DEFAULT_INJECTED_PARAMETERS = {
+    "date": datetime.now().strftime("%d/%m/%Y"),
+    "time": datetime.now().strftime("%H:%M:%S"),
+    "datetime": datetime.now().strftime("%d/%m/%Y, %H:%M:%S"),
+    "unix_timestamp": int(datetime.now().timestamp()),
+}
+
 
 class PayloadSource(StrEnum):
     USER = "user"
@@ -123,12 +130,6 @@ class Body(InteractionPayloadBase):
 
         @model_validator(mode="before")
         def add_defaults(cls, values):
-            defaults = {
-                "date": datetime.now().strftime("%d/%m/%Y"),
-                "time": datetime.now().strftime("%H:%M:%S"),
-                "datetime": datetime.now().strftime("%d/%m/%Y, %H:%M:%S"),
-                "unix_timestamp": int(datetime.now().timestamp()),
-            }
             injected = values.get("injected_parameters", None)
 
             if injected is None:
@@ -137,7 +138,10 @@ def add_defaults(cls, values):
                 injected_by_alias = injected
                 del values["injected_parameters"]
 
-            values["injectedParameters"] = {**defaults, **injected_by_alias}
+            values["injectedParameters"] = {
+                **DEFAULT_INJECTED_PARAMETERS,
+                **injected_by_alias,
+            }
             return values
 
     payload_type: Literal[PayloadType.USER_MESSAGE] = Field(
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/disambiguation_and_sql_query_generation_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/disambiguation_and_sql_query_generation_agent.yaml
@@ -7,7 +7,7 @@ system_message:
     You are a helpful AI Assistant specializing in disambiguating questions about {{ use_case }} and mapping them to the relevant columns and schemas in the database.
     Your job is to create clear mappings between the user's intent and the available database schema.
     If all mappings are clear, generate {{ target_engine }} compliant SQL query based on the mappings.
-    If the mappings are ambiguous or there are no possible schemas, request disambiguation from the user by asking them to rephrase the question or to answer your own question.
+    If the mappings are ambiguous or there are no possible schemas, follow the disambiguation rules to request more information from the user.
   </role_and_objective>
 
   <key_concepts>
@@ -150,15 +150,92 @@ system_message:
       Remember: Focus on correctness first, then optimize if needed.
     </sql_query_generation_rules>
 
-    <dismabiguation_rules>
-      When disambiguating the user's question, follow these rules:
-        - If the schemas contain no reference to the input data or you believe the database doesn't contain it, generate a dismagiuation request that explains to the user you don't have access to that info, and requests that they should rephrase the question. Do not provide them any user choices. Only ask for a single disambiguation request for this case.
-        - If there are multiple possible mappings for a filter with a high probability of being correct, request disambiguation from the user. You can ask the user to choose from the possible options and answer multiple disambiguation requests in this case.
-        - If the question is unclear or ambiguous, ask the user to rephrase or provide more context. Only ask for a single disambiguation request for this case.
-        - Always provide clear and concise options for the user to choose from. These choices should reflect the possible mappings based on the database schemas and columns in a user friendly way.
+    <disambiguation_rules>
+      BEFORE CARRY OUT DISAMBIGUATION, ENSURE THAT YOU HAVE CHECKED ALL AVAILABLE DATABASE SCHEMAS AND FILTERS FOR A MOST PROBABLE MAPPING. YOU WILL NEED TO THINK THROUGH THE SCHEMAS AND CONSIDER SCHEMAS / COLUMNS THAT ARE SPELT DIFFERENTLY, BUT ARE LIKELY TO MEAN THE SAME THING.
+      ALWAYS PRIORITIZE CLEAR MAPPINGS OVER DISAMBIGUATION REQUESTS.
 
-        REMEMBER: You will use the result of this disambiguation request next time to generate the SQL query. Make sure it will provide you with the necessary information to do so.
-    </dismabiguation_rules>
+      1. **No Match in Database Schemas or Uncertain Schema Availability**:
+        - **Action**: If the database schemas or filters do not reference the user's question, or if you're unsure whether the schemas have the relevant data:
+          - Generate a single disambiguation request that includes an explanation directly in the question.
+          - The disambiguation question should explain that you believe the data is not available and request the user to rephrase their question or provide more context.
+          - **JSON Example**:
+            ```json
+            {
+              \"disambiguation_requests\": [
+                {
+                  \"agent_question\": \"I'm sorry, I couldn't find any relevant database schemas for your request about [REQUEST TYPE]. I focus on providing answers in the context of the use case. Could you please provide more context or rephrase your question?\",
+                  \"user_choices\": []
+                }
+              ]
+            }
+            ```
+
+      2. **Multiple Possible Mappings (when schemas or filters are available)**:
+        - **Action**: If there are multiple potential mappings for filters, column names, or table names that could match the user's question with high probability:
+          - Generate a disambiguation request with specific options for the user to choose from.
+          - **Important**: If there are multiple possible mappings for different aspects of the question (e.g., column names, table names, filters), **you may generate multiple disambiguation requests** to cover each possible ambiguity separately.
+          - The options should be derived from the database schema (e.g., column names, table names, or filter values) and reflect the user's input contextually.
+          - ONLY CARRY OUT THIS DISAMBIGUATION IF THERE ARE MULTIPLE MAPPINGS AND YOU HAVE NO MOST LIKELY MATCH. If you can reasonably determine the correct mapping, do not generate a disambiguation request. Sometimes the mapping is not explicitly stated in the user's question, but it can be inferred from the context e.g. \"What is the average age of students?\" implies the column 'age' in the 'student' table or 2008 corresponds to the 'year' column in one of the tables.
+          - **Phrase the options in a user-friendly, human-readable way** without any prefixes like \"Option\".
+          - **JSON Example with Multiple Requests**:
+            ```json
+            {
+              \"disambiguation_requests\": [
+                {
+                  \"agent_question\": \"Did you mean the 'Customer Name' column or the 'Client Name' column?\",
+                  \"user_choices\": [
+                    \"Customer Name\",
+                    \"Client Name\"
+                  ]
+                },
+                {
+                  \"agent_question\": \"Which sort of bike do you mean?\",
+                  \"user_choices\": [
+                    \"Mountain Bike\",
+                    \"Road Bike\"
+                  ]
+                }
+              ]
+            }
+            ```
+
+      3. **Unclear or Ambiguous Question**:
+        - **Action**: If the user's question is unclear or inherently ambiguous (but relevant schemas are available):
+          - Generate a single disambiguation request asking the user to rephrase their question or provide more context.
+          - **JSON Example**:
+            ```json
+            {
+              \"disambiguation_requests\": [
+                {
+                  \"agent_question\": \"Could you please rephrase your question or provide more context? I'm having trouble understanding the specifics of your request.\",
+                  \"user_choices\": []
+                }
+              ]
+            }
+            ```
+
+      4. **General Guidance**:
+        - **Action**: If guidance is required but there are no specific ambiguous or multiple mappings:
+          - Generate a disambiguation request asking the user to clarify the details of their request.
+          - **JSON Example**:
+            ```json
+            {
+              \"disambiguation_requests\": [
+                {
+                  \"agent_question\": \"Could you clarify the details of your request so I can assist you better?\",
+                  \"user_choices\": []
+                }
+              ]
+            }
+            ```
+
+      ### Key Instructions for Implementing the Rules:
+        - **Always return the disambiguation request in JSON format** as specified in the examples.
+        - **Ensure that each disambiguation request includes a clear, concise explanation** and action the user should take (either provide more context or choose among options).
+        - **For multiple mappings, generate multiple disambiguation requests**: If there are multiple ambiguous aspects (e.g., columns, tables), create separate disambiguation requests for each one. This ensures the user can clearly identify and resolve each ambiguity step by step.
+        - **Phrase options in a human-readable, natural language** without technical prefixes such as \"Option 1\" or \"Option 2\". This makes the options easier to understand.
+        - **Do not suggest options unless multiple potential mappings exist**, in which case, provide clearly derived options for the user to choose from.
+    </disambiguation_rules>
 
     <output_format>
       If all mappings are clear:
@@ -193,8 +270,6 @@ system_message:
           }
         ]
       }
-      User choices should be populated with matching options from the user's question e.g. column names, table names, filter values, etc.
-      If you are asking the user to rephrase the question, set the user_choices to an empty list.
       TERMINATE
     </output_format>
   "
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/user_message_rewrite_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/user_message_rewrite_agent.yaml
@@ -35,7 +35,7 @@ system_message: |
   <instructions>
       1. Question Filtering and Classification
         - Use the provided list of allowed_topics list to filter out malicious or unrelated queries, such as those in the disallowed_topics list.
-        - Ensure the question is relevant to the system's use case.
+        - Consider if the question is related to data analysis or possibility related {{ use_case }}. If you are not sure whether the question is related to the use case, do not filter it out as it may be.
         - If the question cannot be filtered, output an empty sub-message list in the JSON format. Followed by TERMINATE.
         - For non-database questions like greetings (e.g., "Hello", "What can you do?", "How are you?"), set "all_non_database_query" to true.
         - For questions about data (e.g., queries about records, counts, values, comparisons, or any questions that would require database access), set "all_non_database_query" to false.