44## Version 0.1 DirkB : Initial version ##
55#####################################################
66
7- import os
8- import pyexasol
9- #from mypy.state import state
10- from pyexasol import ExaError
11-
12- import sys
7+ ########################
8+ ## Required Libraries ##
9+ ########################
1310
11+ import chromadb
12+ from chromadb .config import Settings
1413from cryptography .fernet import Fernet
14+ from datetime import datetime
1515from dotenv import load_dotenv
16+ from exasol .ai .mcp .server .load_prompt import load_prompt
17+ from exasol .ai .mcp .server .server_settings import ExaDbResult
18+ from langchain_core .prompts import ChatPromptTemplate
19+ from langchain_openai import ChatOpenAI
1620from langgraph .graph import StateGraph , START , END
21+ import os
1722from pydantic import BaseModel , Field
18- from typing_extensions import TypedDict
19-
20- from langchain_openai import ChatOpenAI
21- from langchain_core .prompts import ChatPromptTemplate
22-
23- from sqlglot import (
24- exp ,
25- parse_one ,
26- )
23+ import pyexasol
24+ from pyexasol import ExaError
25+ from sqlglot import exp , parse_one
2726from sqlglot .errors import ParseError
28-
29- from exasol .ai .mcp .server .load_prompt import load_prompt
30- from exasol .ai .mcp .server .server_settings import ExaDbResult
31-
32-
33-
27+ import sys
28+ from typing_extensions import TypedDict
3429
3530
3631############################################################################
@@ -63,7 +58,7 @@ def get_environment() -> dict:
6358 load_dotenv ()
6459
6560 secret_key = os .getenv ("MCP_SERVER_EXASOL_SECRET_KEY" )
66- assert secret_key is not None , "Please set SECRET_KEY environment variable"
61+ assert secret_key is not None , "Please set 'MCP_SERVER_EXASOL_SECRET_KEY' environment variable"
6762 fernet = Fernet (secret_key )
6863 stored_password = os .getenv ("MCP_EXASOL_DATABASE_PASSWORD" )
6964 db_password = fernet .decrypt (stored_password ).decode ()
@@ -76,30 +71,30 @@ def get_environment() -> dict:
7671 "llm_server_api_token" : os .getenv ("MCP_OPENAI_SERVER_API_KEY" ),
7772 "llm_server_model_check" : os .getenv ("MCP_OPENAI_SERVER_MODEL_NAME" ),
7873 "llm_server_sql_transform" : os .getenv ("MCP_OPENAI_SERVER_MODEL_NAME" ),
74+ "vectordb_persistent_storage" : os .getenv ("MCP_VECTORDB_FILE" ),
75+ "vectordb_similarity_distance" : os .getenv ("MCP_VECTORDB_SIMILARITY_DISTANCE" ),
7976 }
8077
8178 return env
8279
8380
84-
85-
8681#######################################################
8782## Working status of Text2SQL transformation process ##
8883#######################################################
8984
9085class GraphState (TypedDict ):
91- question : str
92- db_schema : str
93- sql_statement : str
94- query_num_rows : int
95- query_result : str
96- display_result : str
97- num_of_attempts : int
98- is_allowed : str
99- is_relevant : str
100- sql_is_valid : str
101- sql_error : str
102- info : str
86+ question : str # The natural language question
87+ db_schema : str # The database schema to be used
88+ sql_statement : str # The generated SQL statement
89+ query_num_rows : int # The number of rows returned
90+ query_result : str # The result of the generated SQL statement
91+ display_result : str # The transformed result into a visual version
92+ num_of_attempts : int # The number of attempts to generate a valid SQL statement
93+ is_allowed : str # Is the generated SQL statement allowed (READ-ONLY, currently)
94+ is_relevant : str # Does the natural language fit to the underlying database schema
95+ sql_is_valid : str # SQL statements accepted by the Exasol database
96+ sql_error : str # The SQL error returned by the Exasol database, if any
97+ info : str # Additional INFO field
10398
10499
105100############################################################
@@ -226,6 +221,28 @@ def t2s_human_language_to_sql(state: GraphState):
226221
227222 system_prompt = load_prompt (db_schema = db_schema , schema = schema )
228223
224+ ##
225+ ## Check VectorDB for a similar question and SQL Statement
226+ ##
227+
228+ try :
229+ vectordb_client = chromadb .PersistentClient (path = env ['vectordb_persistent_storage' ])
230+ sql_collection = vectordb_client .get_or_create_collection (name = "Questions_SQL_History" )
231+ tmp = sql_collection .query (query_texts = state ['question' ], n_results = 1 , include = ["distances" , "documents" , "metadatas" ])
232+
233+ print (f"VectorDB Result in T2S ::: { tmp } " , file = sys .stderr )
234+ print (f"Vector-DB-Result-Distance in T2S ::: { tmp ["distances" ][0 ][0 ]} " , file = sys .stderr )
235+
236+ if float (tmp ["distances" ][0 ][0 ]) <= float (env ['vectordb_similarity_distance' ]):
237+ system_prompt += f"""
238+ For a similar natural language question you have created the following SQL statement:
239+
240+ { tmp ['metadatas' ][0 ][0 ]['sql' ]}
241+
242+ """
243+ except Exception as e :
244+ print (f"ChromaDB - Error: { e } " , file = sys .stderr )
245+
229246 print (f"Prompt: { system_prompt } " , file = sys .stderr )
230247
231248
@@ -306,7 +323,7 @@ def t2s_check_sql_router(state: GraphState):
306323
307324def t2s_execute_query (state : GraphState ):
308325
309- print (f"#### Beginning of SQL Execution ::: state['sql_statement']" , file = sys .stderr )
326+ print (f"#### Beginning of SQL Execution -01 ::: { state ['sql_statement' ]} " , file = sys .stderr )
310327
311328 env = get_environment ()
312329 try :
@@ -315,6 +332,7 @@ def t2s_execute_query(state: GraphState):
315332 #rows = c.export_to_pandas(state['sql_statement'])
316333
317334 state ['query_result' ] = str (ExaDbResult (rows ))
335+ state ['query_num_rows' ] = c .last_statement ().rowcount ()
318336
319337 except ExaError as e :
320338 state ['sql_is_valid' ] = "NO"
@@ -323,7 +341,28 @@ def t2s_execute_query(state: GraphState):
323341 state ['sql_is_valid' ] = "YES"
324342 state ['sql_error' ] = "None"
325343
326- print ("#### End of SQL Execution" , file = sys .stderr )
344+ ## Store the generated SQL statement and the natural language question into a VectorDB
345+ ## We will use it for similarity search and may add this query to the prompt for future
346+ ## natural language questions
347+
348+ if state ['query_num_rows' ] > 0 :
349+ print (f"Storing into VectorDB" , file = sys .stderr )
350+ load_dotenv ()
351+ vectordb_client = chromadb .PersistentClient (path = env ['vectordb_persistent_storage' ])
352+ sql_collection = vectordb_client .get_or_create_collection (name = "Questions_SQL_History" )
353+
354+ new_idx = sql_collection .count () + 1
355+ sql_collection .add (
356+ documents = [state ['question' ]],
357+ metadatas = [{"sql" : state ['sql_statement' ],
358+ "execution_date" : str (datetime .now ()),
359+ "db_schema" : state ['db_schema' ],
360+ "user" : env ['db_user' ].lower (),
361+ "origin" : "text-to-sql" }],
362+ ids = [f"{ new_idx } " ]
363+ )
364+
365+ print ("#### End of SQL Execution-02" , file = sys .stderr )
327366
328367 return state
329368
0 commit comments