microsoft
diff --git a/‎deploy_ai_search/.env‎
Lines changed: 1 addition & 0 deletions b/‎deploy_ai_search/.env‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎deploy_ai_search/README.md‎
Lines changed: 4 additions & 2 deletions b/‎deploy_ai_search/README.md‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎deploy_ai_search/deploy.py‎
Lines changed: 18 additions & 3 deletions b/‎deploy_ai_search/deploy.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎deploy_ai_search/text_2_sql_query_cache.py‎
Lines changed: 127 additions & 3 deletions b/‎deploy_ai_search/text_2_sql_query_cache.py‎
Lines changed: 127 additions & 3 deletions
diff --git a/‎deploy_ai_search/text_2_sql_schema_store.py‎
Lines changed: 2 additions & 2 deletions b/‎deploy_ai_search/text_2_sql_schema_store.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎…t_2_sql/autogen/agentic_text_2_sql.ipynb‎ ‎… 5 - Agentic Vector Based Text2SQL.ipynb‎text_2_sql/autogen/agentic_text_2_sql.ipynb renamed to text_2_sql/autogen/Iteration 5 - Agentic Vector Based Text2SQL.ipynb b/‎…t_2_sql/autogen/agentic_text_2_sql.ipynb‎ ‎… 5 - Agentic Vector Based Text2SQL.ipynb‎text_2_sql/autogen/agentic_text_2_sql.ipynb renamed to text_2_sql/autogen/Iteration 5 - Agentic Vector Based Text2SQL.ipynb
diff --git a/‎text_2_sql/autogen/README.md‎
Lines changed: 1 addition & 1 deletion b/‎text_2_sql/autogen/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎text_2_sql/autogen/agentic_text_2_sql.py‎
Lines changed: 11 additions & 7 deletions b/‎text_2_sql/autogen/agentic_text_2_sql.py‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎text_2_sql/autogen/agents/llm_agents/answer_agent.yaml‎
Lines changed: 1 addition & 1 deletion b/‎text_2_sql/autogen/agents/llm_agents/answer_agent.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎text_2_sql/autogen/agents/llm_agents/question_decomposition_agent.yaml‎
Lines changed: 1 addition & 1 deletion b/‎text_2_sql/autogen/agents/llm_agents/question_decomposition_agent.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -15,6 +15,7 @@ StorageAccount__FQEndpoint=<Fully qualified endpoint in form ResourceId=resource
 StorageAccount__ConnectionString=<connectionString if using non managed identity. In format: DefaultEndpointsProtocol=https;AccountName=<STG NAME>;AccountKey=<ACCOUNT KEY>;EndpointSuffix=core.windows.net>
 StorageAccount__RagDocuments__Container=<containerName>
 StorageAccount__Text2SqlSchemaStore__Container=<containerName>
+StorageAccount__Text2SqlQueryCache__Container=<containerName>
 OpenAI__ApiKey=<openAIKey if using non managed identity>
 OpenAI__Endpoint=<openAIEndpoint>
 OpenAI__EmbeddingModel=<openAIEmbeddingModelName>
 
@@ -24,17 +24,19 @@ The associated scripts in this portion of the repository contains pre-built scri
     - `index_type text_2_sql_schema_store`. This selects the `Text2SQLSchemaStoreAISearch` sub class.
     - `rebuild`. Whether to delete and rebuild the index.
     - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
-    - `single_data_dictionary`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
+    - `single_data_dictionary_file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
 
 ### Query Cache Index
 
 1. Update `.env` file with the associated values. Not all values are required dependent on whether you are using System / User Assigned Identities or a Key based authentication.
-2. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is no provided indexer or skillset for this cache, it is expected that application code will write directly to it. See the details in the Text2SQL README for different cache strategies.**
+2. Adjust `text_2_sql_query_cache.py` with any changes to the index. **There is an optional provided indexer or skillset for this cache. You may instead want the application code will write directly to it. See the details in the Text2SQL README for different cache strategies.**
 3. Run `deploy.py` with the following args:
 
     - `index_type text_2_sql_query_cache`. This selects the `Text2SQLQueryCacheAISearch` sub class.
     - `rebuild`. Whether to delete and rebuild the index.
     - `suffix`. Optional parameter that will apply a suffix onto the deployed index and indexer. This is useful if you want deploy a test version, before overwriting the main version.
+    - `enable_cache_indexer`. Optional parameter that will enable the query cache indexer. Defaults to False.
+    - `single_cache__file`. Optional parameter that controls whether you will be uploading a single data dictionary, or a data dictionary file per entity. By default, this is set to False.
 
 ## ai_search.py & environment.py
 
 
@@ -24,11 +24,14 @@ def deploy_config(arguments: argparse.Namespace):
         index_config = Text2SqlSchemaStoreAISearch(
             suffix=arguments.suffix,
             rebuild=arguments.rebuild,
-            single_data_dictionary=arguments.single_data_dictionary,
+            single_data_dictionary_file=arguments.single_data_dictionary_file,
         )
     elif arguments.index_type == "text_2_sql_query_cache":
         index_config = Text2SqlQueryCacheAISearch(
-            suffix=arguments.suffix, rebuild=arguments.rebuild
+            suffix=arguments.suffix,
+            rebuild=arguments.rebuild,
+            single_query_cache_file=arguments.single_query_cache_file,
+            enable_query_cache_indexer=arguments.enable_query_cache_indexer,
         )
     else:
         raise ValueError("Invalid Indexer Type")
@@ -60,11 +63,23 @@ def deploy_config(arguments: argparse.Namespace):
         help="Whether want to enable chunking by page in adi skill, if no value is passed considered False",
     )
     parser.add_argument(
-        "--single_data_dictionary",
+        "--single_data_dictionary_file",
         type=bool,
         required=False,
         help="Whether or not a single data dictionary file should be uploaded, or one per entity",
     )
+    parser.add_argument(
+        "--single_query_cache_file",
+        type=bool,
+        required=False,
+        help="Whether or not a single cache file should be uploaded, or one per question",
+    )
+    parser.add_argument(
+        "--enable_query_cache_indexer",
+        type=bool,
+        required=False,
+        help="Whether or not the sql query cache indexer should be enabled",
+    )
     parser.add_argument(
         "--suffix",
         type=str,
 
@@ -5,12 +5,20 @@
     SearchFieldDataType,
     SearchField,
     SearchableField,
-    SimpleField,
-    ComplexField,
     SemanticField,
     SemanticPrioritizedFields,
     SemanticConfiguration,
     SemanticSearch,
+    SearchIndexer,
+    FieldMapping,
+    SimpleField,
+    ComplexField,
+    IndexingParameters,
+    IndexingParametersConfiguration,
+    BlobIndexerDataToExtract,
+    IndexerExecutionEnvironment,
+    BlobIndexerParsingMode,
+    FieldMappingFunction,
 )
 from ai_search import AISearch
 from environment import (
@@ -21,16 +29,30 @@
 class Text2SqlQueryCacheAISearch(AISearch):
     """This class is used to deploy the sql index."""
 
-    def __init__(self, suffix: str | None = None, rebuild: bool | None = False):
+    def __init__(
+        self,
+        suffix: str | None = None,
+        rebuild: bool | None = False,
+        single_query_cache_file: bool | None = False,
+        enable_query_cache_indexer: bool | None = False,
+    ):
         """Initialize the Text2SqlAISearch class. This class implements the deployment of the sql index.
 
         Args:
             suffix (str, optional): The suffix for the indexer. Defaults to None. If an suffix is provided, it is assumed to be a test indexer.
             rebuild (bool, optional): Whether to rebuild the index. Defaults to False.
+            single_query_cache_file (bool, optional): Whether to use a single cache file. Defaults to False. Only applies if the cache indexer is enabled.
+            enable_query_cache_indexer (bool, optional): Whether to enable cache indexer. Defaults to False.
         """
         self.indexer_type = IndexerType.TEXT_2_SQL_QUERY_CACHE
+        self.enable_query_cache_indexer = enable_query_cache_indexer
         super().__init__(suffix, rebuild)
 
+        if single_query_cache_file:
+            self.parsing_mode = BlobIndexerParsingMode.JSON_ARRAY
+        else:
+            self.parsing_mode = BlobIndexerParsingMode.JSON
+
     def get_index_fields(self) -> list[SearchableField]:
         """This function returns the index fields for sql index.
 
@@ -56,6 +78,11 @@ def get_index_fields(self) -> list[SearchableField]:
                 name="SqlQueryDecomposition",
                 collection=True,
                 fields=[
+                    SearchableField(
+                        name="SubQuestion",
+                        type=SearchFieldDataType.String,
+                        filterable=True,
+                    ),
                     SearchableField(
                         name="SqlQuery",
                         type=SearchFieldDataType.String,
@@ -130,3 +157,100 @@ def get_semantic_search(self) -> SemanticSearch:
         semantic_search = SemanticSearch(configurations=[semantic_config])
 
         return semantic_search
+
+    def get_skills(self) -> list:
+        """Get the skillset for the indexer.
+
+        Returns:
+            list: The skillsets  used in the indexer"""
+
+        if self.enable_query_cache_indexer is False:
+            return []
+
+        embedding_skill = self.get_vector_skill(
+            "/document", "/document/Question", target_name="QuestionEmbedding"
+        )
+
+        skills = [embedding_skill]
+
+        return skills
+
+    def get_indexer(self) -> SearchIndexer:
+        """This function returns the indexer for sql.
+
+        Returns:
+            SearchIndexer: The indexer for sql"""
+
+        if self.enable_query_cache_indexer is False:
+            return None
+
+        # Only place on schedule if it is not a test deployment
+        if self.test:
+            schedule = None
+            batch_size = 4
+        else:
+            schedule = {"interval": "PT24H"}
+            batch_size = 16
+
+        if self.environment.use_private_endpoint:
+            execution_environment = IndexerExecutionEnvironment.PRIVATE
+        else:
+            execution_environment = IndexerExecutionEnvironment.STANDARD
+
+        indexer_parameters = IndexingParameters(
+            batch_size=batch_size,
+            configuration=IndexingParametersConfiguration(
+                data_to_extract=BlobIndexerDataToExtract.CONTENT_AND_METADATA,
+                query_timeout=None,
+                execution_environment=execution_environment,
+                fail_on_unprocessable_document=False,
+                fail_on_unsupported_content_type=False,
+                index_storage_metadata_only_for_oversized_documents=True,
+                indexed_file_name_extensions=".json",
+                parsing_mode=self.parsing_mode,
+            ),
+            max_failed_items=5,
+        )
+
+        indexer = SearchIndexer(
+            name=self.indexer_name,
+            description="Indexer to sql entities and generate embeddings",
+            skillset_name=self.skillset_name,
+            target_index_name=self.index_name,
+            data_source_name=self.data_source_name,
+            schedule=schedule,
+            field_mappings=[
+                FieldMapping(
+                    source_field_name="metadata_storage_last_modified",
+                    target_field_name="DateLastModified",
+                )
+            ],
+            output_field_mappings=[
+                FieldMapping(
+                    source_field_name="/document/Question",
+                    target_field_name="Id",
+                    mapping_function=FieldMappingFunction(
+                        name="base64Encode",
+                        parameters={"useHttpServerUtilityUrlTokenEncode": False},
+                    ),
+                ),
+                FieldMapping(
+                    source_field_name="/document/Question", target_field_name="Question"
+                ),
+                FieldMapping(
+                    source_field_name="/document/QuestionEmbedding",
+                    target_field_name="QuestionEmbedding",
+                ),
+                FieldMapping(
+                    source_field_name="/document/SqlQueryDecomposition",
+                    target_field_name="SqlQueryDecomposition",
+                ),
+                FieldMapping(
+                    source_field_name="/document/DateLastModified",
+                    target_field_name="DateLastModified",
+                ),
+            ],
+            parameters=indexer_parameters,
+        )
+
+        return indexer
@@ -43,7 +43,7 @@ def __init__(
         self,
         suffix: str | None = None,
         rebuild: bool | None = False,
-        single_data_dictionary: bool | None = False,
+        single_data_dictionary_file: bool | None = False,
     ):
         """Initialize the Text2SqlAISearch class. This class implements the deployment of the sql index.
 
@@ -57,7 +57,7 @@ def __init__(
         ]
         super().__init__(suffix, rebuild)
 
-        if single_data_dictionary:
+        if single_data_dictionary_file:
             self.parsing_mode = BlobIndexerParsingMode.JSON_ARRAY
         else:
             self.parsing_mode = BlobIndexerParsingMode.JSON
 
@@ -20,7 +20,7 @@ As the query cache is shared between users (no data is stored in the cache), a n
 
 ## Provided Notebooks & Scripts
 
-- `./agentic_text_2_sql.ipynb` provides example of how to utilise the Agentic Vector Based Text2SQL approach to query the database. The query cache plugin will be enabled or disabled depending on the environmental parameters.
+- `./Iteration 5 - Agentic Vector Based Text2SQL.ipynb` provides example of how to utilise the Agentic Vector Based Text2SQL approach to query the database. The query cache plugin will be enabled or disabled depending on the environmental parameters.
 
 ## Agents
 
 
@@ -2,7 +2,7 @@
 # Licensed under the MIT License.
 from autogen_agentchat.task import TextMentionTermination, MaxMessageTermination
 from autogen_agentchat.teams import SelectorGroupChat
-from utils.models import MINI_MODEL
+from utils.llm_model_creator import LLMModelCreator
 from utils.llm_agent_creator import LLMAgentCreator
 import logging
 from agents.custom_agents.sql_query_cache_agent import SqlQueryCacheAgent
@@ -86,13 +86,17 @@ def selector(messages):
             and messages[-1].content is not None
         ):
             cache_result = json.loads(messages[-1].content)
-            if cache_result.get("cached_questions_and_schemas") is not None:
+            if cache_result.get(
+                "cached_questions_and_schemas"
+            ) is not None and cache_result.get("contains_pre_run_results"):
                 decision = "sql_query_correction_agent"
+            if (
+                cache_result.get("cached_questions_and_schemas") is not None
+                and cache_result.get("contains_pre_run_results") is False
+            ):
+                decision = "sql_query_generation_agent"
             else:
-                decision = "sql_schema_selection_agent"
-
-        elif messages[-1].source == "sql_query_cache_agent":
-            decision = "question_decomposition_agent"
+                decision = "question_decomposition_agent"
 
         elif messages[-1].source == "question_decomposition_agent":
             decomposition_result = json.loads(messages[-1].content)
@@ -129,7 +133,7 @@ def agentic_flow(self):
         agentic_flow = SelectorGroupChat(
             self.agents,
             allow_repeated_speaker=False,
-            model_client=MINI_MODEL,
+            model_client=LLMModelCreator.get_model("4o-mini"),
             termination_condition=self.termination_condition,
             selector_func=AgenticText2Sql.selector,
         )
 
@@ -1,5 +1,5 @@
 model:
-  gpt-4o-mini
+  4o-mini
 description:
   "An agent that takes the final results from the SQL query and writes the answer to the user's question"
 system_message:
 
@@ -1,5 +1,5 @@
 model:
-  gpt-4o-mini
+  4o-mini
 description:
   "An agent that will decompose the user's question into smaller parts to be used in the SQL queries. Use this agent when the user's question is too complex to be answered in one SQL query. Only use if the user's question is too complex to be answered in one SQL query."
 system_message: