Update models

BenConstable9 · BenConstable9 · commit c725d7efabf2 · 2025-01-17T17:16:16.000Z
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/ai_search.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/connectors/ai_search.py
@@ -188,6 +188,7 @@ async def get_entity_schemas(
                 "AIService__AzureSearchOptions__Text2SqlSchemaStore__SemanticConfig"
             ],
             top=3,
+            minimum_score=1.5,
         )
 
         fqn_to_trim = ".".join(stringified_engine_specific_fields)
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py b/text_2_sql/text_2_sql_core/src/text_2_sql_core/payloads/interaction_payloads.py
@@ -60,7 +60,9 @@ class DismabiguationRequest(InteractionPayloadBase):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        self.body = self.Body(**kwargs)
+        body_kwargs = kwargs.get("body", kwargs)
+
+        self.body = self.Body(**body_kwargs)
 
 
 class AnswerWithSourcesPayload(InteractionPayloadBase):
@@ -86,7 +88,9 @@ class Source(InteractionPayloadBase):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        self.body = self.Body(**kwargs)
+        body_kwargs = kwargs.get("body", kwargs)
+
+        self.body = self.Body(**body_kwargs)
 
 
 class ProcessingUpdatePayload(InteractionPayloadBase):
@@ -105,7 +109,9 @@ class Body(InteractionPayloadBase):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        self.body = self.Body(**kwargs)
+        body_kwargs = kwargs.get("body", kwargs)
+
+        self.body = self.Body(**body_kwargs)
 
 
 class UserMessagePayload(InteractionPayloadBase):
@@ -123,8 +129,15 @@ def add_defaults(cls, values):
                 "datetime": datetime.now().strftime("%d/%m/%Y, %H:%M:%S"),
                 "unix_timestamp": int(datetime.now().timestamp()),
             }
-            injected = values.get("injected_parameters", {})
-            values["injected_parameters"] = {**defaults, **injected}
+            injected = values.get("injected_parameters", None)
+
+            if injected is None:
+                injected_by_alias = values.get("injectedParameters", {})
+            else:
+                injected_by_alias = injected
+                del values["injected_parameters"]
+
+            values["injectedParameters"] = {**defaults, **injected_by_alias}
             return values
 
     payload_type: Literal[PayloadType.USER_MESSAGE] = Field(
@@ -138,7 +151,9 @@ def add_defaults(cls, values):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        self.body = self.Body(**kwargs)
+        body_kwargs = kwargs.get("body", kwargs)
+
+        self.body = self.Body(**body_kwargs)
 
 
 class InteractionPayload(RootModel):
diff --git a/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/disambiguation_and_sql_query_generation_agent.yaml b/text_2_sql/text_2_sql_core/src/text_2_sql_core/prompts/disambiguation_and_sql_query_generation_agent.yaml
@@ -7,6 +7,7 @@ system_message:
     You are a helpful AI Assistant specializing in disambiguating questions about {{ use_case }} and mapping them to the relevant columns and schemas in the database.
     Your job is to create clear mappings between the user's intent and the available database schema.
     If all mappings are clear, generate {{ target_engine }} compliant SQL query based on the mappings.
+    If the mappings are ambiguous or there are no possible schemas, request disambiguation from the user by asking them to rephrase the question or to answer your own question.
   </role_and_objective>
 
   <key_concepts>
@@ -45,7 +46,8 @@ system_message:
        - Handle simple WHERE conditions
 
     2. For Filter Conditions:
-       - Map text filters to appropriate columns
+       - Map text filters to appropriate columns.
+       - If there is no clear mapping or competing values for a filter, request disambiguation.
        - Handle numeric comparisons correctly
        - Process date/time conditions
        - Consider multiple filter conditions
@@ -148,39 +150,51 @@ system_message:
       Remember: Focus on correctness first, then optimize if needed.
     </sql_query_generation_rules>
 
+    <dismabiguation_rules>
+      When disambiguating the user's question, follow these rules:
+        - If the schemas contain no reference to the input data or you believe the database doesn't contain it, generate a dismagiuation request that explains to the user you don't have access to that info, and requests that they should rephrase the question. Do not provide them any user choices. Only ask for a single disambiguation request for this case.
+        - If there are multiple possible mappings for a filter with a high probability of being correct, request disambiguation from the user. You can ask the user to choose from the possible options and answer multiple disambiguation requests in this case.
+        - If the question is unclear or ambiguous, ask the user to rephrase or provide more context. Only ask for a single disambiguation request for this case.
+        - Always provide clear and concise options for the user to choose from. These choices should reflect the possible mappings based on the database schemas and columns in a user friendly way.
+
+        REMEMBER: You will use the result of this disambiguation request next time to generate the SQL query. Make sure it will provide you with the necessary information to do so.
+    </dismabiguation_rules>
+
     <output_format>
-    If all mappings are clear:
-    {
-      \"filter_mapping\": {
-        \"<filter_term>\": [{
-          \"column\": \"<column_name>\",
-          \"filter_value\": \"<value>\"
-        }]
-      },
-      \"aggregation_mapping\": {
-        \"<aggregation_term>\": {
-          \"table\": \"<table_name>\",  // For simple counts
-          \"measure_column\": \"<column_name>\",  // For other aggregations
-          \"aggregation_type\": \"<type>\",
-          \"distinct\": true/false,  // Optional
-          \"group_by_column\": \"<column_name>\"  // Optional
-        }
-      }
-    }
-
-    If disambiguation needed:
-    {
-      \"disambiguation_requests\": [
-        {
-          \"agent_question\": \"<specific_question>\",
-          \"user_choices\": [\"<choice1>\", \"<choice2>\"]
+      If all mappings are clear:
+      {
+        \"filter_mapping\": {
+          \"<filter_term>\": [{
+            \"column\": \"<column_name>\",
+            \"filter_value\": \"<value>\"
+          }]
         },
-        {
-          \"agent_question\": \"<specific_question>\",
-          \"user_choices\": [\"<choice1>\", \"<choice2>\"]
+        \"aggregation_mapping\": {
+          \"<aggregation_term>\": {
+            \"table\": \"<table_name>\",  // For simple counts
+            \"measure_column\": \"<column_name>\",  // For other aggregations
+            \"aggregation_type\": \"<type>\",
+            \"distinct\": true/false,  // Optional
+            \"group_by_column\": \"<column_name>\"  // Optional
+          }
         }
-      ]
-    }
-    TERMINATE
+      }
+
+      If disambiguation needed or no schemas could possibly match:
+      {
+        \"disambiguation_requests\": [
+          {
+            \"agent_question\": \"<specific_question>\",
+            \"user_choices\": [\"<choice1>\", \"<choice2>\"]
+          },
+          {
+            \"agent_question\": \"<specific_question>\",
+            \"user_choices\": [\"<choice1>\", \"<choice2>\"]
+          }
+        ]
+      }
+      User choices should be populated with matching options from the user's question e.g. column names, table names, filter values, etc.
+      If you are asking the user to rephrase the question, set the user_choices to an empty list.
+      TERMINATE
     </output_format>
   "

Original file line number	Diff line number	Diff line change
`@@ -188,6 +188,7 @@ async def get_entity_schemas(`
`188`	`188`	`"AIService__AzureSearchOptions__Text2SqlSchemaStore__SemanticConfig"`
`189`	`189`	`],`
`190`	`190`	`top=3,`
	`191`	`+ minimum_score=1.5,`
`191`	`192`	`)`
`192`	`193`
`193`	`194`	`fqn_to_trim = ".".join(stringified_engine_specific_fields)`