Update changes

BenConstable9 · BenConstable9 · commit 1e43911f63a6 · 2024-11-27T22:06:22.000Z
diff --git a/text_2_sql/autogen/agents/custom_agents/sql_query_cache_agent.py b/text_2_sql/autogen/agents/custom_agents/sql_query_cache_agent.py
@@ -6,7 +6,7 @@
 from autogen_agentchat.base import Response
 from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
 from autogen_core.base import CancellationToken
-from utils.sql import fetch_queries_from_cache
+from utils.sql import SqlHelper
 import json
 import logging
 
@@ -18,6 +18,8 @@ def __init__(self):
             "An agent that fetches the queries from the cache based on the user question.",
         )
 
+        self.sql_helper = SqlHelper()
+
     @property
     def produced_message_types(self) -> List[type[ChatMessage]]:
         return [TextMessage]
@@ -41,7 +43,7 @@ async def on_messages_stream(
         # Fetch the queries from the cache based on the user question.
         logging.info("Fetching queries from cache based on the user question...")
 
-        cached_queries = await fetch_queries_from_cache(user_question)
+        cached_queries = await self.sql_helper.fetch_queries_from_cache(user_question)
 
         yield Response(
             chat_message=TextMessage(
diff --git a/text_2_sql/autogen/utils/ai_search.py b/text_2_sql/autogen/utils/ai_search.py
@@ -12,121 +12,38 @@
 from datetime import datetime, timezone
 
 
-async def run_ai_search_query(
-    query,
-    vector_fields: list[str],
-    retrieval_fields: list[str],
-    index_name: str,
-    semantic_config: str,
-    top=5,
-    include_scores=False,
-    minimum_score: float = None,
-):
-    """Run the AI search query."""
-    identity_type = get_identity_type()
-
-    async with AsyncAzureOpenAI(
-        # This is the default and can be omitted
-        api_key=os.environ["OpenAI__ApiKey"],
-        azure_endpoint=os.environ["OpenAI__Endpoint"],
-        api_version=os.environ["OpenAI__ApiVersion"],
-    ) as open_ai_client:
-        embeddings = await open_ai_client.embeddings.create(
-            model=os.environ["OpenAI__EmbeddingModel"], input=query
-        )
-
-        # Extract the embedding vector
-        embedding_vector = embeddings.data[0].embedding
-
-    vector_query = VectorizedQuery(
-        vector=embedding_vector,
-        k_nearest_neighbors=7,
-        fields=",".join(vector_fields),
-    )
-
-    if identity_type == IdentityType.SYSTEM_ASSIGNED:
-        credential = DefaultAzureCredential()
-    elif identity_type == IdentityType.USER_ASSIGNED:
-        credential = DefaultAzureCredential(
-            managed_identity_client_id=os.environ["ClientID"]
-        )
-    else:
-        credential = AzureKeyCredential(
-            os.environ["AIService__AzureSearchOptions__Key"]
-        )
-    async with SearchClient(
-        endpoint=os.environ["AIService__AzureSearchOptions__Endpoint"],
-        index_name=index_name,
-        credential=credential,
-    ) as search_client:
-        results = await search_client.search(
-            top=top,
-            semantic_configuration_name=semantic_config,
-            search_text=query,
-            select=",".join(retrieval_fields),
-            vector_queries=[vector_query],
-            query_type="semantic",
-            query_language="en-GB",
-        )
-
-        combined_results = []
-
-        async for result in results.by_page():
-            async for item in result:
-                if (
-                    minimum_score is not None
-                    and item["@search.reranker_score"] < minimum_score
-                ):
-                    continue
-
-                if include_scores is False:
-                    del item["@search.reranker_score"]
-                    del item["@search.score"]
-                    del item["@search.highlights"]
-                    del item["@search.captions"]
-
-                logging.info("Item: %s", item)
-                combined_results.append(item)
+class AISearchHelper:
+    @staticmethod
+    async def run_ai_search_query(
+        query,
+        vector_fields: list[str],
+        retrieval_fields: list[str],
+        index_name: str,
+        semantic_config: str,
+        top=5,
+        include_scores=False,
+        minimum_score: float = None,
+    ):
+        """Run the AI search query."""
+        identity_type = get_identity_type()
 
-        logging.info("Results: %s", combined_results)
-
-    return combined_results
-
-
-async def add_entry_to_index(document: dict, vector_fields: dict, index_name: str):
-    """Add an entry to the search index."""
-
-    logging.info("Document: %s", document)
-    logging.info("Vector Fields: %s", vector_fields)
-
-    for field in vector_fields.keys():
-        if field not in document.keys():
-            logging.error(f"Field {field} is not in the document.")
-
-    identity_type = get_identity_type()
-
-    fields_to_embed = {field: document[field] for field in vector_fields}
-
-    document["DateLastModified"] = datetime.now(timezone.utc)
-
-    try:
         async with AsyncAzureOpenAI(
             # This is the default and can be omitted
             api_key=os.environ["OpenAI__ApiKey"],
             azure_endpoint=os.environ["OpenAI__Endpoint"],
             api_version=os.environ["OpenAI__ApiVersion"],
         ) as open_ai_client:
             embeddings = await open_ai_client.embeddings.create(
-                model=os.environ["OpenAI__EmbeddingModel"],
-                input=fields_to_embed.values(),
+                model=os.environ["OpenAI__EmbeddingModel"], input=query
             )
 
             # Extract the embedding vector
-            for i, field in enumerate(vector_fields.values()):
-                document[field] = embeddings.data[i].embedding
+            embedding_vector = embeddings.data[0].embedding
 
-        document["Id"] = base64.urlsafe_b64encode(document["Question"].encode()).decode(
-            "utf-8"
+        vector_query = VectorizedQuery(
+            vector=embedding_vector,
+            k_nearest_neighbors=7,
+            fields=",".join(vector_fields),
         )
 
         if identity_type == IdentityType.SYSTEM_ASSIGNED:
@@ -144,7 +61,92 @@ async def add_entry_to_index(document: dict, vector_fields: dict, index_name: st
             index_name=index_name,
             credential=credential,
         ) as search_client:
-            await search_client.upload_documents(documents=[document])
-    except Exception as e:
-        logging.error("Failed to add item to index.")
-        logging.error("Error: %s", e)
+            results = await search_client.search(
+                top=top,
+                semantic_configuration_name=semantic_config,
+                search_text=query,
+                select=",".join(retrieval_fields),
+                vector_queries=[vector_query],
+                query_type="semantic",
+                query_language="en-GB",
+            )
+
+            combined_results = []
+
+            async for result in results.by_page():
+                async for item in result:
+                    if (
+                        minimum_score is not None
+                        and item["@search.reranker_score"] < minimum_score
+                    ):
+                        continue
+
+                    if include_scores is False:
+                        del item["@search.reranker_score"]
+                        del item["@search.score"]
+                        del item["@search.highlights"]
+                        del item["@search.captions"]
+
+                    logging.info("Item: %s", item)
+                    combined_results.append(item)
+
+            logging.info("Results: %s", combined_results)
+
+        return combined_results
+
+    @staticmethod
+    async def add_entry_to_index(document: dict, vector_fields: dict, index_name: str):
+        """Add an entry to the search index."""
+
+        logging.info("Document: %s", document)
+        logging.info("Vector Fields: %s", vector_fields)
+
+        for field in vector_fields.keys():
+            if field not in document.keys():
+                logging.error(f"Field {field} is not in the document.")
+
+        identity_type = get_identity_type()
+
+        fields_to_embed = {field: document[field] for field in vector_fields}
+
+        document["DateLastModified"] = datetime.now(timezone.utc)
+
+        try:
+            async with AsyncAzureOpenAI(
+                # This is the default and can be omitted
+                api_key=os.environ["OpenAI__ApiKey"],
+                azure_endpoint=os.environ["OpenAI__Endpoint"],
+                api_version=os.environ["OpenAI__ApiVersion"],
+            ) as open_ai_client:
+                embeddings = await open_ai_client.embeddings.create(
+                    model=os.environ["OpenAI__EmbeddingModel"],
+                    input=fields_to_embed.values(),
+                )
+
+                # Extract the embedding vector
+                for i, field in enumerate(vector_fields.values()):
+                    document[field] = embeddings.data[i].embedding
+
+            document["Id"] = base64.urlsafe_b64encode(
+                document["Question"].encode()
+            ).decode("utf-8")
+
+            if identity_type == IdentityType.SYSTEM_ASSIGNED:
+                credential = DefaultAzureCredential()
+            elif identity_type == IdentityType.USER_ASSIGNED:
+                credential = DefaultAzureCredential(
+                    managed_identity_client_id=os.environ["ClientID"]
+                )
+            else:
+                credential = AzureKeyCredential(
+                    os.environ["AIService__AzureSearchOptions__Key"]
+                )
+            async with SearchClient(
+                endpoint=os.environ["AIService__AzureSearchOptions__Endpoint"],
+                index_name=index_name,
+                credential=credential,
+            ) as search_client:
+                await search_client.upload_documents(documents=[document])
+        except Exception as e:
+            logging.error("Failed to add item to index.")
+            logging.error("Error: %s", e)
diff --git a/text_2_sql/autogen/utils/llm_agent_creator.py b/text_2_sql/autogen/utils/llm_agent_creator.py
@@ -3,7 +3,7 @@
 import yaml
 from autogen_core.components.tools import FunctionTool
 from autogen_agentchat.agents import AssistantAgent
-from utils.sql import query_execution, get_entity_schemas, query_validation
+from utils.sql import SqlHelper
 from utils.models import MINI_MODEL
 from jinja2 import Template
 
@@ -24,20 +24,20 @@ def get_model(cls, model_name):
             raise ValueError(f"Model {model_name} not found")
 
     @classmethod
-    def get_tool(cls, tool_name):
+    def get_tool(cls, sql_helper, tool_name):
         if tool_name == "sql_query_execution_tool":
             return FunctionTool(
-                query_execution,
+                sql_helper.query_execution,
                 description="Runs an SQL query against the SQL Database to extract information",
             )
         elif tool_name == "sql_get_entity_schemas_tool":
             return FunctionTool(
-                get_entity_schemas,
+                sql_helper.get_entity_schemas,
                 description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user question and use these as the search term. Several entities may be returned. Only use when the provided schemas in the system prompt are not sufficient to answer the question.",
             )
         elif tool_name == "sql_query_validation_tool":
             return FunctionTool(
-                query_validation,
+                sql_helper.query_validation,
                 description="Validates the SQL query to ensure that it is syntactically correct for the target database engine. Use this BEFORE executing any SQL statement.",
             )
         else:
@@ -55,10 +55,12 @@ def get_property_and_render_parameters(cls, agent_file, property, parameters):
     def create(cls, name: str, **kwargs):
         agent_file = cls.load_agent_file(name)
 
+        sql_helper = SqlHelper()
+
         tools = []
         if "tools" in agent_file and len(agent_file["tools"]) > 0:
             for tool in agent_file["tools"]:
-                tools.append(cls.get_tool(tool))
+                tools.append(cls.get_tool(sql_helper, tool))
 
         agent = AssistantAgent(
             name=name,
diff --git a/text_2_sql/autogen/utils/sql.py b/text_2_sql/autogen/utils/sql.py