Update

BenConstable9 · BenConstable9 · commit 934f8430fd25 · 2025-01-21T17:47:00.000Z
diff --git a/text_2_sql/GETTING_STARTED.md b/text_2_sql/GETTING_STARTED.md
@@ -5,7 +5,9 @@ To get started, perform the following steps:
 1. Setup Azure OpenAI in your subscription with **gpt-4o-mini** & an embedding model, alongside a SQL Server sample database, AI Search and a storage account.
 2. Clone this repository and deploy the AI Search text2sql indexes from `deploy_ai_search`.
 3. Run `uv sync` within the text_2_sql directory to install dependencies.
+    - Install the optional dependencies if you need a database connector other than TSQL.
+    - See the supported connectors in `text_2_sql_core/src/text_2_sql_core/connectors`.
 4. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`.
 5. Generate a data dictionary for your target server using the instructions in the **Running** section of the `data_dictionary/README.md`.
-6. Upload these data dictionaries to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
+6. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
 7. Navigate to `autogen` directory to view the AutoGen implementation. Follow the steps in `Iteration 5 - Agentic Vector Based Text2SQL.ipynb` to get started.
diff --git a/text_2_sql/autogen/pyproject.toml b/text_2_sql/autogen/pyproject.toml
@@ -6,12 +6,12 @@ readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [
     "aiostream>=0.6.4",
-    "autogen-agentchat==0.4.0.dev11",
-    "autogen-core==0.4.0.dev11",
-    "autogen-ext[azure,openai]==0.4.0.dev11",
+    "autogen-agentchat==0.4.2",
+    "autogen-core==0.4.2",
+    "autogen-ext[azure,openai]==0.4.2",
     "grpcio>=1.68.1",
     "pyyaml>=6.0.2",
-    "text_2_sql_core[snowflake,databricks]",
+    "text_2_sql_core",
     "sqlparse>=0.4.4",
     "nltk>=3.8.1",
 ]
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/creators/llm_agent_creator.py b/text_2_sql/autogen/src/autogen_text_2_sql/creators/llm_agent_creator.py
@@ -1,6 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
-from autogen_core.components.tools import FunctionToolAlias
+from autogen_core.tools import FunctionTool
 from autogen_agentchat.agents import AssistantAgent
 from text_2_sql_core.connectors.factory import ConnectorFactory
 from text_2_sql_core.prompts.load import load
@@ -33,20 +33,20 @@ def get_tool(cls, sql_helper, tool_name: str):
             tool_name (str): The name of the tool to retrieve.
 
         Returns:
-            FunctionToolAlias: The tool."""
+            FunctionTool: The tool."""
 
         if tool_name == "sql_query_execution_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.query_execution_with_limit,
                 description="Runs an SQL query against the SQL Database to extract information",
             )
         elif tool_name == "sql_get_entity_schemas_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.get_entity_schemas,
                 description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user input and use these as the search term. Several entities may be returned. Only use when the provided schemas in the message history are not sufficient to answer the question.",
             )
         elif tool_name == "sql_get_column_values_tool":
-            return FunctionToolAlias(
+            return FunctionTool(
                 sql_helper.get_column_values,
                 description="Gets the values of a column in the SQL Database by selecting the most relevant entity based on the search term. Several entities may be returned. Use this to get the correct value to apply against a filter for a user's question.",
             )
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/parallel_query_solving_agent.py
@@ -5,10 +5,10 @@
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
 from autogen_agentchat.messages import (
-    AgentMessage,
+    AgentEvent,
     ChatMessage,
     TextMessage,
-    ToolCallResultMessage,
+    ToolCallExecutionEvent,
 )
 from autogen_core import CancellationToken
 import json
@@ -86,7 +86,7 @@ def parse_inner_message(self, message):
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         last_response = messages[-1].content
         parameter_input = messages[0].content
         try:
@@ -118,7 +118,7 @@ async def consume_inner_messages_from_agentic_flow(
                 logging.info(f"Checking Inner Message: {inner_message}")
 
                 try:
-                    if isinstance(inner_message, ToolCallResultMessage):
+                    if isinstance(inner_message, ToolCallExecutionEvent):
                         for call_result in inner_message.content:
                             # Check for SQL query results
                             parsed_message = self.parse_inner_message(
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_query_cache_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_query_cache_agent.py
@@ -4,7 +4,7 @@
 
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
-from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage
 from autogen_core import CancellationToken
 from text_2_sql_core.custom_agents.sql_query_cache_agent import (
     SqlQueryCacheAgentCustomAgent,
@@ -39,7 +39,7 @@ async def on_messages(
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         # Get the decomposed messages from the user_message_rewrite_agent
         try:
             request_details = json.loads(messages[0].content)
diff --git a/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_schema_selection_agent.py b/text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_schema_selection_agent.py
@@ -4,7 +4,7 @@
 
 from autogen_agentchat.agents import BaseChatAgent
 from autogen_agentchat.base import Response
-from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
+from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage
 from autogen_core import CancellationToken
 import json
 import logging
@@ -39,7 +39,7 @@ async def on_messages(
 
     async def on_messages_stream(
         self, messages: Sequence[ChatMessage], cancellation_token: CancellationToken
-    ) -> AsyncGenerator[AgentMessage | Response, None]:
+    ) -> AsyncGenerator[AgentEvent | Response, None]:
         # Try to parse as JSON first
         try:
             request_details = json.loads(messages[0].content)
diff --git a/text_2_sql/data_dictionary/README.md b/text_2_sql/data_dictionary/README.md
@@ -207,10 +207,6 @@ This avoids having to index the fact tables, saving storage, and allows us to st
 
 ## Automatic Generation
 
-> [!IMPORTANT]
->
-> - The data dictionary generation scripts have been moved to `text_2_sql_core`. Documentation will be updated shortly.
-
 Manually creating the `entities.json` is a time consuming exercise. To speed up generation, a mixture of SQL Queries and an LLM can be used to generate a initial version. Existing comments and descriptions in the database, can be combined with sample values to generate the necessary descriptions. Manual input can then be used to tweak it for the use case and any improvements.
 
 `./text_2_sql_core/data_dictionary/data_dictionary_creator.py` contains a utility class that handles the automatic generation and selection of schemas from the source SQL database. It must be subclassed to the appropriate engine to handle engine specific queries and connection details.
@@ -222,28 +218,24 @@ The following Databases have pre-built scripts for them:
 - **Databricks:** `./text_2_sql_core/data_dictionary/databricks_data_dictionary_creator.py`
 - **Snowflake:** `./text_2_sql_core/data_dictionary/snowflake_data_dictionary_creator.py`
 - **TSQL:** `./text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py`
+- **PostgreSQL:** `./text_2_sql_core/data_dictionary/postgresql_data_dictionary_creator.py`
 
 If there is no pre-built script for your database engine, take one of the above as a starting point and adjust it.
 
 ## Running
 
-Fill out the `.env` template with connection details to your chosen database.
-
-Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment.
-
-`data_dictionary <DATABASE ENGINE>`
-
-You can pass the following command line arguements:
-
-- `-- output_directory` or `-o`: Optional directory that the script will write the output files to.
-- `-- single_file` or `-s`: Optional flag that writes all schemas to a single file.
-- `-- generate_definitions` or `-gen`: Optional flag that uses OpenAI to generate descriptions.
-
-If you need control over the following, run the file directly:
-
-- `entities`: A list of entities to extract. Defaults to None.
-- `excluded_entities`: A list of entities to exclude.
-- `excluded_schemas`: A list of schemas to exclude.
+1. Create your `.env` file based on the provided sample `.env.example`. Place this file in the same place as the `.env.example`.
+2. Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment.
+3. Run `data_dictionary <DATABASE ENGINE>`
+    - You can pass the following command line arguements:
+        - `-- output_directory` or `-o`: Optional directory that the script will write the output files to.
+        - `-- single_file` or `-s`: Optional flag that writes all schemas to a single file.
+        - `-- generate_definitions` or `-gen`: Optional flag that uses OpenAI to generate descriptions.
+    - If you need control over the following, run the file directly:
+        - `entities`: A list of entities to extract. Defaults to None.
+        - `excluded_entities`: A list of entities to exclude.
+        - `excluded_schemas`: A list of schemas to exclude.
+4. Upload these generated data dictionaries files to the relevant containers in your storage account. Wait for them to be automatically indexed with the included skillsets.
 
 > [!IMPORTANT]
 >
diff --git a/text_2_sql/text_2_sql_core/pyproject.toml b/text_2_sql/text_2_sql_core/pyproject.toml
@@ -8,6 +8,7 @@ authors = [
 ]
 requires-python = ">=3.12"
 dependencies = [
+    "aiohttp>=3.11.11",
     "aioodbc>=0.5.0",
     "azure-identity>=1.19.0",
     "azure-search>=1.0.0b2",
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ authors = [`
`8`	`8`	`]`
`9`	`9`	`requires-python = ">=3.12"`
`10`	`10`	`dependencies = [`
	`11`	`+ "aiohttp>=3.11.11",`
`11`	`12`	`"aioodbc>=0.5.0",`
`12`	`13`	`"azure-identity>=1.19.0",`
`13`	`14`	`"azure-search>=1.0.0b2",`