README adapted

SailingDirk · SailingDirk · commit 4e19f8908c38 · 2025-09-02T12:08:00.000+02:00
diff --git a/README.md b/README.md
@@ -228,7 +228,7 @@ MCP_EXASOL_DATABASE_PASSWORD=<your encrypted uer passpword>
 MCP_OPENAI_SERVER_URL=http://localhost:1234/v1
 MCP_OPENAI_SERVER_API_KEY=<API-Key of your LLM Server>
 MCP_OPENAI_SERVER_MODEL_NAME=<your selexted model>
-MCP_VECTORDB_FILE=/Users/dirk/Temp/mcp_exasol_t2s.vectordb
+MCP_VECTORDB_FILE=<path-to-database-file-including-filename>>
 MCP_VECTORDB_SIMILARITY_SEARCH_DISTANCE=0.3
 ```
 
@@ -239,7 +239,8 @@ mcp_exasol_passwords.py
 ```
 
 tool. For security reasons, keep this tool in a safe place and restrict the access to yourself only.  
-
+  
+  
 ### Large Language Models to consider
 
 For the transformation process, you can select any LLM which is known to code (specifically for SQL)  
@@ -252,6 +253,22 @@ LLM to be utilized, you need to check, if the LLM is trained for tool usage. The
 the LLM features the higher is the performance requirement for a timely answer. A 70B parameter  
 LLM on an Apple Macbook Pro with M4MAX and 40 GPU cores and *LM-Studio* or *ollama* is already  
 consuming quite some time. Having a dedicated LLM server (on premise) is definitely a plus.
+  
+  
+### Please consider!
+
+Large Language Models do not act like a human brain, basically they predict the next possible  
+word based on a set of parameters (do not mix it with connections between neurons of the neural   
+network, a.k.a. parameters), e.g. temperature. You have to instruct them precisely about  
+the task they have to solve. For many AI Desktop applications this is even valid for displaying  
+the result set the text-to-sql option has created. 
+
+For example, with __Claude Desktop__ the following text helped to receive pure results without  
+any commentary, or other additional information. Consider it purely optional:  
+
+```
+Use text-to-sql tool; use only answer from tool to display result. Do not comment!
+```
 
 ## License
 
diff --git a/exasol/ai/mcp/server/mcp_server.py b/exasol/ai/mcp/server/mcp_server.py
@@ -187,7 +187,7 @@ def _register_tools(self):
                     "ALWAYS use this tool for translation of natural language questions into SQL."
                     "The tool always retrieves the metadata of the requested schema on its own."
                     "Do not use other tools!"
-                )
+                ),
             )
 
     def _build_meta_query(
@@ -407,7 +407,10 @@ def execute_query(
     #################
 
 
-    def text_to_sql(self, question: Annotated[str, Field(description="question")], db_schema: Annotated[str, Field(description="db_schema")], state: GraphState):
+    def text_to_sql(self,
+                    question: Annotated[str, Field(description="question")],
+                    db_schema: Annotated[str, Field(description="db_schema")],
+                    state: GraphState | None) -> GraphState:
 
         print(" ", file=sys.stderr)
         print(" ", file=sys.stderr)
diff --git a/exasol/ai/mcp/server/resources/prompt.txt b/exasol/ai/mcp/server/resources/prompt.txt
@@ -24,5 +24,6 @@ Use the following schema: {db_schema}:
 
 Tables:
 
-{schema}    
+{schema}
+
 
diff --git a/exasol/ai/mcp/server/text_to_sql.py b/exasol/ai/mcp/server/text_to_sql.py
@@ -14,18 +14,22 @@
 from dotenv import load_dotenv
 from exasol.ai.mcp.server.load_prompt import load_prompt
 from exasol.ai.mcp.server.server_settings import ExaDbResult
+import json
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_openai import ChatOpenAI
 from langgraph.graph import StateGraph, START, END
 import os
+import pprint
 from pydantic import BaseModel, Field
 import pyexasol
 from pyexasol import ExaError
+import re
 from sqlglot import exp, parse_one
 from sqlglot.errors import ParseError
 import sys
 from typing_extensions import TypedDict
 
+import ssl
 
 ############################################################################
 ## Get the user password stored encrypted on the desktop file system      ##
@@ -141,6 +145,7 @@ def t2s_database_schema(db_schema: str, env: dict) -> str:
 
     return schema_metadata
 
+
 ##################################################################
 ## Check if human question relates to requested database schema ##
 ##################################################################
@@ -221,7 +226,8 @@ def t2s_human_language_to_sql(state: GraphState):
     system_prompt = load_prompt(db_schema=db_schema, schema=schema)
 
     ##
-    ## Check VectorDB for a similar question and SQL Statement
+    ## Check VectorDB for a similar question and SQL Statement,
+    ## retrieve a threshold for similarity from the .env file
     ##
 
     try:
@@ -328,7 +334,17 @@ def t2s_execute_query(state: GraphState):
     try:
         with pyexasol.connect(dsn=env['dsn'], user=env['db_user'], password=env['db_password'], schema=state['db_schema']) as c:
             rows = c.execute(state['sql_statement']).fetchall()
-            #rows = c.export_to_pandas(state['sql_statement'])
+
+            cols = c.meta.sql_columns(state['sql_statement'])
+
+            col_names = tuple(cols.keys())
+
+            rows.insert(0, col_names)
+            rslt = rows
+
+            print(f"#### End of SQL Execution-COLS {cols}", file=sys.stderr)
+            print(f"#### End of SQL Execution-ROWS {rows}", file=sys.stderr)
+            print(f"#### End of SQL Execution-TABLE {rslt}", file=sys.stderr)
 
             state['query_result'] = str(ExaDbResult(rows))
             state['query_num_rows'] = c.last_statement().rowcount()
@@ -398,14 +414,51 @@ class DisplayResult(BaseModel):
         description="The result set converted into a nice and shiny table in MARKDOWN syntax."
     )
 
+
 def t2s_show_answer(state: GraphState):
 
     env = get_environment()
 
-    state['display_result'] = state['query_result']
+    result = re.search(r"(\[.*\])", state['query_result'])
+    result_set = result.group(0)
+
+    system_prompt = f"""
+    You are a helpful assistant formatting datasets. You will use Markdown syntax.
+    Print the final result.
+    """
+
+    question = f"""Transform the dataset below into a table in markdown syntax:
+    
+    {result_set}
+    
+    """
+
+    print(f"Show-Result-System-Prompt :: {system_prompt} \n {question}", file=sys.stderr)
+
+    #user_prompt = "" # Question: " + state['query_result'"]
+
+    llm = ChatOpenAI(model_name=env["llm_server_sql_transform"],
+                     temperature=0.0,
+                     openai_api_base=env["llm_server_url"],
+                     openai_api_key=env["llm_server_api_token"]).with_structured_output(DisplayResult)
+
+
+
+    t2s_prompt = ChatPromptTemplate.from_messages(
+        [
+            ( "system", system_prompt),
+            ( "user", "Question: {question}" ),
+        ]
+    )
+
+    render_process = t2s_prompt | llm
+    result = render_process.invoke({"question": question})
+    state["display_result"] = str(result.display_result)
+
+
 
     print(f" ", file=sys.stderr)
-    print(f"Show-Answer :: {state['query_result']}", file=sys.stderr)
+    print(f"Show-Answer :: {result_set}", file=sys.stderr)
     print(f"Show-Answer-2 :: {state['display_result']}", file=sys.stderr)
     print(f" ", file=sys.stderr)
 
@@ -546,6 +599,7 @@ async def start_t2s_process(state: GraphState):
 
     workflow = StateGraph(GraphState)
 
+    workflow.add_edge(START, "check_relevance")
     workflow.add_node("check_relevance", t2s_check_relevance)
     workflow.add_node("transform_into_sql", t2s_human_language_to_sql)
     workflow.add_node("info_unable_query_type", t2s_info_unable_query_type)
@@ -558,8 +612,6 @@ async def start_t2s_process(state: GraphState):
     workflow.add_node("info_unable_create_sql", t2s_info_unable_create_sql)
     workflow.add_node("check_sql_valid", t2s_check_sql_valid)
 
-    workflow.add_edge(START, "check_relevance")
-
     workflow.add_conditional_edges(
         "check_relevance",
         t2s_relevance_router,
@@ -568,17 +620,6 @@ async def start_t2s_process(state: GraphState):
             "NO": "info_query_not_relevant",
         },
     )
-    workflow.add_edge("execute_query", "check_sql_valid")
-
-    workflow.add_conditional_edges(
-        "check_sql_valid",
-        t2s_sql_valid_router,
-        {
-            "YES": "show_answer",
-            "NO": "check_max_tries"
-        }
-    )
-
 
     workflow.add_conditional_edges(
         "check_max_tries",
@@ -590,6 +631,8 @@ async def start_t2s_process(state: GraphState):
 
     )
 
+    workflow.add_edge("transform_into_sql", "check_sql_is_allowed")
+
     workflow.add_conditional_edges(
         "check_sql_is_allowed",
         t2s_check_sql_router,
@@ -599,15 +642,24 @@ async def start_t2s_process(state: GraphState):
         }
     )
 
-    workflow.add_edge("transform_into_sql", "check_sql_is_allowed")
-    #workflow.add_edge("transform_into_sql", "execute_query")
+    workflow.add_edge("execute_query", "check_sql_valid")
+
+    workflow.add_conditional_edges(
+        "check_sql_valid",
+        t2s_sql_valid_router,
+        {
+            "YES": "show_answer",
+            "NO": "check_max_tries"
+        }
+    )
+
+    workflow.add_edge("show_answer", END)
+
     workflow.add_edge("correct_query", "transform_into_sql")
     workflow.add_edge("info_query_not_relevant", END)
     workflow.add_edge("info_unable_create_sql", END)
 
-    ## to be changed
-    # workflow.add_edge("execute_query", "show_answer")
-    workflow.add_edge("show_answer", END)
+
 
     t2s_process = workflow.compile()