Skip to content

Commit 780b9b1

Browse files
committed
Update generated samples
1 parent a0ada39 commit 780b9b1

File tree

94 files changed

+8919
-181
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+8919
-181
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ repos:
2929
rev: v2.1.0
3030
hooks:
3131
- id: codespell
32-
args: [--exclude-file, uv.lock]
32+
args: [--skip, "uv.lock,*.json,*,jsonl"]
3333

3434
- repo: https://github.com/psf/black-pre-commit-mirror
3535
rev: 23.12.1
@@ -50,4 +50,4 @@ repos:
5050
rev: 0.5.5
5151
hooks:
5252
# Update the uv lockfile
53-
- id: uv-lock
53+
- id: uv-lock

deploy_ai_search/src/deploy_ai_search/README.md renamed to deploy_ai_search/README.md

File renamed without changes.

text_2_sql/autogen/Iteration 5 - Agentic Vector Based Text2SQL.ipynb

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"source": [
5151
"import dotenv\n",
5252
"import logging\n",
53-
"from autogen_agentchat.task import Console\n",
53+
"from autogen_agentchat.ui import Console\n",
5454
"from autogen_text_2_sql.autogen_text_2_sql import AutoGenText2Sql"
5555
]
5656
},
@@ -85,7 +85,7 @@
8585
"metadata": {},
8686
"outputs": [],
8787
"source": [
88-
"agentic_text_2_sql = AutoGenText2Sql(target_engine=\"TSQL\", engine_specific_rules=\"Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.\").agentic_flow"
88+
"agentic_text_2_sql = AutoGenText2Sql(engine_specific_rules=\"Use TOP X to limit the number of rows returned instead of LIMIT X. NEVER USE LIMIT X as it produces a syntax error.\", use_case=\"Analysing sales data across product categories.\").agentic_flow"
8989
]
9090
},
9191
{
@@ -105,13 +105,6 @@
105105
"source": [
106106
"await Console(result)"
107107
]
108-
},
109-
{
110-
"cell_type": "code",
111-
"execution_count": null,
112-
"metadata": {},
113-
"outputs": [],
114-
"source": []
115108
}
116109
],
117110
"metadata": {

text_2_sql/autogen/pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ description = "AutoGen Based Implementation"
55
readme = "README.md"
66
requires-python = ">=3.12"
77
dependencies = [
8-
"autogen-agentchat==0.4.0.dev7",
9-
"autogen-core==0.4.0.dev7",
10-
"autogen-ext[azure,openai]==0.4.0.dev7",
8+
"autogen-agentchat==0.4.0.dev9",
9+
"autogen-core==0.4.0.dev9",
10+
"autogen-ext[azure,openai]==0.4.0.dev9",
1111
"grpcio>=1.68.1",
1212
"pyyaml>=6.0.2",
1313
"text_2_sql_core",

text_2_sql/autogen/src/autogen_text_2_sql/autogen_text_2_sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def agents(self):
7676
@property
7777
def termination_condition(self):
7878
"""Define the termination condition for the chat."""
79-
termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(10)
79+
termination = TextMentionTermination("TERMINATE") | MaxMessageTermination(15)
8080
return termination
8181

8282
@staticmethod

text_2_sql/autogen/src/autogen_text_2_sql/creators/llm_agent_creator.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def load_agent_file(cls, name: str) -> dict:
2424
return load(name.lower())
2525

2626
@classmethod
27-
def get_tool(cls, sql_helper, tool_name: str):
27+
def get_tool(cls, sql_helper, ai_search_helper, tool_name: str):
2828
"""Gets the tool based on the tool name.
2929
Args:
3030
----
@@ -41,9 +41,14 @@ def get_tool(cls, sql_helper, tool_name: str):
4141
)
4242
elif tool_name == "sql_get_entity_schemas_tool":
4343
return FunctionTool(
44-
sql_helper.get_entity_schemas,
44+
ai_search_helper.get_entity_schemas,
4545
description="Gets the schema of a view or table in the SQL Database by selecting the most relevant entity based on the search term. Extract key terms from the user question and use these as the search term. Several entities may be returned. Only use when the provided schemas in the system prompt are not sufficient to answer the question.",
4646
)
47+
elif tool_name == "sql_get_column_values_tool":
48+
return FunctionTool(
49+
ai_search_helper.get_column_values,
50+
description="Gets the values of a column in the SQL Database by selecting the most relevant entity based on the search term. Several entities may be returned. Use this to get the correct value to apply against a filter for a user's question.",
51+
)
4752
elif tool_name == "sql_query_validation_tool":
4853
return FunctionTool(
4954
sql_helper.query_validation,
@@ -93,11 +98,12 @@ def create(cls, name: str, **kwargs) -> AssistantAgent:
9398
agent_file = cls.load_agent_file(name)
9499

95100
sql_helper = ConnectorFactory.get_database_connector()
101+
ai_search_helper = ConnectorFactory.get_ai_search_connector()
96102

97103
tools = []
98104
if "tools" in agent_file and len(agent_file["tools"]) > 0:
99105
for tool in agent_file["tools"]:
100-
tools.append(cls.get_tool(sql_helper, tool))
106+
tools.append(cls.get_tool(sql_helper, ai_search_helper, tool))
101107

102108
agent = AssistantAgent(
103109
name=name,

text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_query_cache_agent.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from autogen_agentchat.agents import BaseChatAgent
66
from autogen_agentchat.base import Response
77
from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
8-
from autogen_core.base import CancellationToken
9-
from text_2_sql_core.connectors.sql import SqlConnector
8+
from autogen_core import CancellationToken
9+
from text_2_sql_core.connectors.factory import ConnectorFactory
1010
import json
1111
import logging
1212

@@ -18,7 +18,7 @@ def __init__(self):
1818
"An agent that fetches the queries from the cache based on the user question.",
1919
)
2020

21-
self.sql_helper = SqlConnector()
21+
self.sql_connector = ConnectorFactory.get_database_connector()
2222

2323
@property
2424
def produced_message_types(self) -> List[type[ChatMessage]]:
@@ -43,7 +43,9 @@ async def on_messages_stream(
4343
# Fetch the queries from the cache based on the user question.
4444
logging.info("Fetching queries from cache based on the user question...")
4545

46-
cached_queries = await self.sql_helper.fetch_queries_from_cache(user_question)
46+
cached_queries = await self.sql_connector.fetch_queries_from_cache(
47+
user_question
48+
)
4749

4850
yield Response(
4951
chat_message=TextMessage(

text_2_sql/autogen/src/autogen_text_2_sql/custom_agents/sql_schema_selection_agent.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,21 @@
55
from autogen_agentchat.agents import BaseChatAgent
66
from autogen_agentchat.base import Response
77
from autogen_agentchat.messages import AgentMessage, ChatMessage, TextMessage
8-
from autogen_core.base import CancellationToken
8+
from autogen_core import CancellationToken
99
from text_2_sql_core.connectors.sql import SqlConnector
1010
import json
1111
import logging
1212

1313

1414
class SqlQueryCacheAgent(BaseChatAgent):
15-
def __init__(self):
15+
def __init__(self, **kwargs):
1616
super().__init__(
17-
"sql_query_cache_agent",
18-
"An agent that fetches the queries from the cache based on the user question.",
17+
"sql_schema_selection_agent",
18+
"An agent that fetches the schemas from the cache based on the user question.",
1919
)
2020

21-
self.sql_helper = SqlConnector()
21+
self.kwargs = kwargs
22+
self.sql_connector = SqlConnector()
2223

2324
@property
2425
def produced_message_types(self) -> List[type[ChatMessage]]:
@@ -43,7 +44,9 @@ async def on_messages_stream(
4344
# Fetch the queries from the cache based on the user question.
4445
logging.info("Fetching queries from cache based on the user question...")
4546

46-
cached_queries = await self.sql_helper.fetch_queries_from_cache(user_question)
47+
cached_queries = await self.sql_connector.fetch_queries_from_cache(
48+
user_question
49+
)
4750

4851
yield Response(
4952
chat_message=TextMessage(

text_2_sql/data_dictionary/README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,13 @@ A full data dictionary must be built for all the views / tables you which to exp
8888

8989
## Indexing
9090

91-
`./deploy_ai_search/text_2_sql.py` & `./deploy_ai_search/text_2_sql_query_cache.py` contains the scripts to deploy and index the data dictionary for use within the plugin. See instructions in `./deploy_ai_search/README.md`.
91+
`./deploy_ai_search/text_2_sql.py` & `./deploy_ai_search/text_2_sql_query_cache.py` contains the scripts to deploy and index the data dictionary for use within the plugin. See instructions in `./deploy_ai_search/README.md`. There is **no automatic mechanism** to upload these .json files currently to a storage account, once generated, you must automatically upload them to the appropriate storage account that the indexer is connected to.
9292

9393
## Automatic Generation
9494

9595
> [!IMPORTANT]
9696
>
97-
> - The data dictioonary generation scripts have been moved to `text_2_sql_core`. Documentation will be updated shortly.
97+
> - The data dictionary generation scripts have been moved to `text_2_sql_core`. Documentation will be updated shortly.
9898
9999
Manually creating the `entities.json` is a time consuming exercise. To speed up generation, a mixture of SQL Queries and an LLM can be used to generate a initial version. Existing comments and descriptions in the database, can be combined with sample values to generate the necessary descriptions. Manual input can then be used to tweak it for the use case and any improvements.
100100

@@ -109,3 +109,20 @@ The following Databases have pre-built scripts for them:
109109
- **TSQL:** `./text_2_sql_core/data_dictionary/tsql_data_dictionary_creator.py`
110110

111111
If there is no pre-built script for your database engine, take one of the above as a starting point and adjust it.
112+
113+
## Running
114+
115+
Fill out the `.env` template with connection details to your chosen database.
116+
117+
Package and install the `text_2_sql_core` library. See [build](https://docs.astral.sh/uv/concepts/projects/build/) if you want to build as a wheel and install on an agent. Or you can run from within a `uv` environment.
118+
119+
`data_dictionary <DATABASE ENGINE>`
120+
121+
You can pass the following command line arguements:
122+
123+
- `-- output_directory` or `-o`: Optional directory that the script will write the output files to.
124+
- `-- single_file` or `-s`: Optional flag that writes all schemas to a single file.
125+
126+
> [!IMPORTANT]
127+
>
128+
> - The data dictioonary generation scripts will output column values for all possible filter clauses. This could lead to output of sensitive information. You should add exclusion criteria to exclude these for only columns that you may want to filter by.

0 commit comments

Comments
 (0)