Skip to content

Commit 4e19f89

Browse files
committed
README adapted
1 parent e40820d commit 4e19f89

File tree

4 files changed

+100
-27
lines changed

4 files changed

+100
-27
lines changed

README.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ MCP_EXASOL_DATABASE_PASSWORD=<your encrypted uer passpword>
228228
MCP_OPENAI_SERVER_URL=http://localhost:1234/v1
229229
MCP_OPENAI_SERVER_API_KEY=<API-Key of your LLM Server>
230230
MCP_OPENAI_SERVER_MODEL_NAME=<your selexted model>
231-
MCP_VECTORDB_FILE=/Users/dirk/Temp/mcp_exasol_t2s.vectordb
231+
MCP_VECTORDB_FILE=<path-to-database-file-including-filename>>
232232
MCP_VECTORDB_SIMILARITY_SEARCH_DISTANCE=0.3
233233
```
234234

@@ -239,7 +239,8 @@ mcp_exasol_passwords.py
239239
```
240240

241241
tool. For security reasons, keep this tool in a safe place and restrict the access to yourself only.
242-
242+
243+
243244
### Large Language Models to consider
244245

245246
For the transformation process, you can select any LLM which is known to code (specifically for SQL)
@@ -252,6 +253,22 @@ LLM to be utilized, you need to check, if the LLM is trained for tool usage. The
252253
the LLM features the higher is the performance requirement for a timely answer. A 70B parameter
253254
LLM on an Apple Macbook Pro with M4MAX and 40 GPU cores and *LM-Studio* or *ollama* is already
254255
consuming quite some time. Having a dedicated LLM server (on premise) is definitely a plus.
256+
257+
258+
### Please consider!
259+
260+
Large Language Models do not act like a human brain, basically they predict the next possible
261+
word based on a set of parameters (do not mix it with connections between neurons of the neural
262+
network, a.k.a. parameters), e.g. temperature. You have to instruct them precisely about
263+
the task they have to solve. For many AI Desktop applications this is even valid for displaying
264+
the result set the text-to-sql option has created.
265+
266+
For example, with __Claude Desktop__ the following text helped to receive pure results without
267+
any commentary, or other additional information. Consider it purely optional:
268+
269+
```
270+
Use text-to-sql tool; use only answer from tool to display result. Do not comment!
271+
```
255272

256273
## License
257274

exasol/ai/mcp/server/mcp_server.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def _register_tools(self):
187187
"ALWAYS use this tool for translation of natural language questions into SQL."
188188
"The tool always retrieves the metadata of the requested schema on its own."
189189
"Do not use other tools!"
190-
)
190+
),
191191
)
192192

193193
def _build_meta_query(
@@ -407,7 +407,10 @@ def execute_query(
407407
#################
408408

409409

410-
def text_to_sql(self, question: Annotated[str, Field(description="question")], db_schema: Annotated[str, Field(description="db_schema")], state: GraphState):
410+
def text_to_sql(self,
411+
question: Annotated[str, Field(description="question")],
412+
db_schema: Annotated[str, Field(description="db_schema")],
413+
state: GraphState | None) -> GraphState:
411414

412415
print(" ", file=sys.stderr)
413416
print(" ", file=sys.stderr)

exasol/ai/mcp/server/resources/prompt.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@ Use the following schema: {db_schema}:
2424

2525
Tables:
2626

27-
{schema}
27+
{schema}
28+
2829

exasol/ai/mcp/server/text_to_sql.py

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,22 @@
1414
from dotenv import load_dotenv
1515
from exasol.ai.mcp.server.load_prompt import load_prompt
1616
from exasol.ai.mcp.server.server_settings import ExaDbResult
17+
import json
1718
from langchain_core.prompts import ChatPromptTemplate
1819
from langchain_openai import ChatOpenAI
1920
from langgraph.graph import StateGraph, START, END
2021
import os
22+
import pprint
2123
from pydantic import BaseModel, Field
2224
import pyexasol
2325
from pyexasol import ExaError
26+
import re
2427
from sqlglot import exp, parse_one
2528
from sqlglot.errors import ParseError
2629
import sys
2730
from typing_extensions import TypedDict
2831

32+
import ssl
2933

3034
############################################################################
3135
## Get the user password stored encrypted on the desktop file system ##
@@ -141,6 +145,7 @@ def t2s_database_schema(db_schema: str, env: dict) -> str:
141145

142146
return schema_metadata
143147

148+
144149
##################################################################
145150
## Check if human question relates to requested database schema ##
146151
##################################################################
@@ -221,7 +226,8 @@ def t2s_human_language_to_sql(state: GraphState):
221226
system_prompt = load_prompt(db_schema=db_schema, schema=schema)
222227

223228
##
224-
## Check VectorDB for a similar question and SQL Statement
229+
## Check VectorDB for a similar question and SQL Statement,
230+
## retrieve a threshold for similarity from the .env file
225231
##
226232

227233
try:
@@ -328,7 +334,17 @@ def t2s_execute_query(state: GraphState):
328334
try:
329335
with pyexasol.connect(dsn=env['dsn'], user=env['db_user'], password=env['db_password'], schema=state['db_schema']) as c:
330336
rows = c.execute(state['sql_statement']).fetchall()
331-
#rows = c.export_to_pandas(state['sql_statement'])
337+
338+
cols = c.meta.sql_columns(state['sql_statement'])
339+
340+
col_names = tuple(cols.keys())
341+
342+
rows.insert(0, col_names)
343+
rslt = rows
344+
345+
print(f"#### End of SQL Execution-COLS {cols}", file=sys.stderr)
346+
print(f"#### End of SQL Execution-ROWS {rows}", file=sys.stderr)
347+
print(f"#### End of SQL Execution-TABLE {rslt}", file=sys.stderr)
332348

333349
state['query_result'] = str(ExaDbResult(rows))
334350
state['query_num_rows'] = c.last_statement().rowcount()
@@ -398,14 +414,51 @@ class DisplayResult(BaseModel):
398414
description="The result set converted into a nice and shiny table in MARKDOWN syntax."
399415
)
400416

417+
401418
def t2s_show_answer(state: GraphState):
402419

403420
env = get_environment()
404421

405-
state['display_result'] = state['query_result']
422+
result = re.search(r"(\[.*\])", state['query_result'])
423+
result_set = result.group(0)
424+
425+
system_prompt = f"""
426+
You are a helpful assistant formatting datasets. You will use Markdown syntax.
427+
Print the final result.
428+
"""
429+
430+
question = f"""Transform the dataset below into a table in markdown syntax:
431+
432+
{result_set}
433+
434+
"""
435+
436+
print(f"Show-Result-System-Prompt :: {system_prompt} \n {question}", file=sys.stderr)
437+
438+
#user_prompt = "" # Question: " + state['query_result'"]
439+
440+
llm = ChatOpenAI(model_name=env["llm_server_sql_transform"],
441+
temperature=0.0,
442+
openai_api_base=env["llm_server_url"],
443+
openai_api_key=env["llm_server_api_token"]).with_structured_output(DisplayResult)
444+
445+
446+
447+
t2s_prompt = ChatPromptTemplate.from_messages(
448+
[
449+
( "system", system_prompt),
450+
( "user", "Question: {question}" ),
451+
]
452+
)
453+
454+
render_process = t2s_prompt | llm
455+
result = render_process.invoke({"question": question})
456+
state["display_result"] = str(result.display_result)
457+
458+
406459

407460
print(f" ", file=sys.stderr)
408-
print(f"Show-Answer :: {state['query_result']}", file=sys.stderr)
461+
print(f"Show-Answer :: {result_set}", file=sys.stderr)
409462
print(f"Show-Answer-2 :: {state['display_result']}", file=sys.stderr)
410463
print(f" ", file=sys.stderr)
411464

@@ -546,6 +599,7 @@ async def start_t2s_process(state: GraphState):
546599

547600
workflow = StateGraph(GraphState)
548601

602+
workflow.add_edge(START, "check_relevance")
549603
workflow.add_node("check_relevance", t2s_check_relevance)
550604
workflow.add_node("transform_into_sql", t2s_human_language_to_sql)
551605
workflow.add_node("info_unable_query_type", t2s_info_unable_query_type)
@@ -558,8 +612,6 @@ async def start_t2s_process(state: GraphState):
558612
workflow.add_node("info_unable_create_sql", t2s_info_unable_create_sql)
559613
workflow.add_node("check_sql_valid", t2s_check_sql_valid)
560614

561-
workflow.add_edge(START, "check_relevance")
562-
563615
workflow.add_conditional_edges(
564616
"check_relevance",
565617
t2s_relevance_router,
@@ -568,17 +620,6 @@ async def start_t2s_process(state: GraphState):
568620
"NO": "info_query_not_relevant",
569621
},
570622
)
571-
workflow.add_edge("execute_query", "check_sql_valid")
572-
573-
workflow.add_conditional_edges(
574-
"check_sql_valid",
575-
t2s_sql_valid_router,
576-
{
577-
"YES": "show_answer",
578-
"NO": "check_max_tries"
579-
}
580-
)
581-
582623

583624
workflow.add_conditional_edges(
584625
"check_max_tries",
@@ -590,6 +631,8 @@ async def start_t2s_process(state: GraphState):
590631

591632
)
592633

634+
workflow.add_edge("transform_into_sql", "check_sql_is_allowed")
635+
593636
workflow.add_conditional_edges(
594637
"check_sql_is_allowed",
595638
t2s_check_sql_router,
@@ -599,15 +642,24 @@ async def start_t2s_process(state: GraphState):
599642
}
600643
)
601644

602-
workflow.add_edge("transform_into_sql", "check_sql_is_allowed")
603-
#workflow.add_edge("transform_into_sql", "execute_query")
645+
workflow.add_edge("execute_query", "check_sql_valid")
646+
647+
workflow.add_conditional_edges(
648+
"check_sql_valid",
649+
t2s_sql_valid_router,
650+
{
651+
"YES": "show_answer",
652+
"NO": "check_max_tries"
653+
}
654+
)
655+
656+
workflow.add_edge("show_answer", END)
657+
604658
workflow.add_edge("correct_query", "transform_into_sql")
605659
workflow.add_edge("info_query_not_relevant", END)
606660
workflow.add_edge("info_unable_create_sql", END)
607661

608-
## to be changed
609-
# workflow.add_edge("execute_query", "show_answer")
610-
workflow.add_edge("show_answer", END)
662+
611663

612664
t2s_process = workflow.compile()
613665

0 commit comments

Comments
 (0)