Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .env.copy
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ DB_USER=postgres
DB_PASSWORD=postgres

OPENAI_API_KEY=your_actual_openai_api_key_here

QDRANT_URL=http://localhost:6333
QDRANT_COLLECTION=northwind_rag
GROQ_API_KEY=
5 changes: 4 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@
{"label": "Schema-Based Querying", "value": "schema"},
{"label": "RAG (Retrieval-Augmented Generation)", "value": "rag"},
{"label": "Visualize", "value": "visualize"},
{"label": "Multi-Table Join", "value": "multitablejoin"}
{"label": "Multi-Table Join", "value": "multitablejoin"},
{"label": "Simple RAG", "value": "simple_rag"}
],
value="schema",
size="sm",
Expand Down Expand Up @@ -360,6 +361,8 @@ def toggle_modal(n1, n2, n3, is_open):
return not is_open
return is_open



# Callback for chat functionality
@app.callback(
[Output("chat-messages", "children"),
Expand Down
45 changes: 45 additions & 0 deletions database/query_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,49 @@ def validate_query(self, sql_query: str, context: Dict[str, Any]) -> Tuple[bool,
logger.error(f"Security validation error: {e}")
return False, "Blocked by guardrails due to internal validation error", sql_query

class SimpleRAGQueryEngine(QueryEngine):
"""Simple RAG-based query generation using Qdrant and Groq"""

def __init__(self, qdrant_config: Dict, groq_api_key: str):
from simple_rag.rag_logic import SQLAgent
self.sql_agent = SQLAgent(qdrant_config, groq_api_key)
logger.info("Initialized SimpleRAGQueryEngine")

def get_name(self) -> str:
return "Simple RAG Querying"

def generate_query(self, user_query: str, context: Dict[str, Any]) -> Tuple[bool, str]:
"""Generate SQL query using RAG logic"""
try:
result = self.sql_agent.process_query(user_query)
if result['success']:
return True, result['sql_query']
else:
return False, result['error']
except Exception as e:
error_msg = f"Failed to generate query: {str(e)}"
logger.error(error_msg)
return False, error_msg

def execute_query(self, sql_query: str) -> Tuple[bool, Any]:
"""Execute the generated SQL query"""
try:
# _execute_sql_query returns (results, columns) tuple
results, columns = self.sql_agent._execute_sql_query(sql_query)

# Convert to DataFrame for consistency with other engines
import pandas as pd
if results and columns:
df = pd.DataFrame(results, columns=columns)
return True, df
else:
return True, pd.DataFrame() # Empty DataFrame if no results

except Exception as e:
error_msg = f"Failed to execute query: {str(e)}"
logger.error(error_msg)
return False, error_msg

class QueryEngineFactory:
"""Factory for creating query engines"""

Expand All @@ -560,6 +603,8 @@ def create_query_engine(engine_type: str, config: Dict[str, Any]) -> QueryEngine
if not api_key:
raise ValueError("Groq API key required for visualization")
return VisualizationQueryEngine(api_key)
elif engine_type == "simple_rag":
return SimpleRAGQueryEngine(config.get('qdrant_config'), config.get('groq_api_key'))
else:
raise ValueError(f"Unknown query engine type: {engine_type}")

Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,6 @@ dependencies = [
"seaborn>=0.13.0",
"matplotlib>=3.8.0",
"langchain-groq>=0.3.7",
"sentence-transformers>=5.1.0",
"qdrant-client>=1.12.1",
]
47 changes: 47 additions & 0 deletions simple_rag/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Demo Platform

## Setup Instructions

1. Clone the repository:
```
git clone https://github.com/pthom/northwind_psql.git
```

2. Navigate into the cloned directory:
```
cd northwind_psql
```

3. Run Docker Compose to start the services:
```
docker compose up
```

4. Set up Qdrant using Docker:
```
docker run -p 6333:6333 -p 6334:6334 \
-v "$(pwd)/qdrant_storage:/qdrant/storage:z" \
qdrant/qdrant
```

5. Navigate to the parent directory:
```
cd ..
```


6. Set up the `.env` file with the following content:
```
QDRANT_URL=http://localhost:6333
QDRANT_COLLECTION=northwind_rag
GROQ_API_KEY=
```

7. Run the embedding insertion script (first time only):
```
python embedd_insert.py
```

## Sample Query

- **Query:** List the top 5 customers by total order value.
65 changes: 65 additions & 0 deletions simple_rag/embedd_insert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import numpy as np
from sqlalchemy import create_engine, inspect, text
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct

PG_CONN = os.getenv("PG_CONN", "postgresql://postgres:postgres@localhost:55432/northwind")
QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
COLLECTION = "northwind_rag"

# 1. Connect to Postgres
engine = create_engine(PG_CONN)
insp = inspect(engine)

# 2. Build table cards
def build_table_cards(engine, sample_rows=3):
cards = []
with engine.connect() as conn:
for table in insp.get_table_names():
cols = insp.get_columns(table)
col_lines = [f"- {c['name']} ({c['type']})" for c in cols]
fks = insp.get_foreign_keys(table)
fk_lines = [f"- {fk['constrained_columns']} -> {fk['referred_table']}.{fk['referred_columns']}" for fk in fks] or ["- none"]
try:
rows = conn.execute(text(f"SELECT * FROM {table} LIMIT {sample_rows}")).fetchall()
except:
rows = []
card = f"""Table: {table}
Columns:
{chr(10).join(col_lines)}
Foreign Keys:
{chr(10).join(fk_lines)}
Row samples ({len(rows)}):
{chr(10).join([str(r) for r in rows])}
"""
cards.append({"table": table, "text": card})
return cards

cards = build_table_cards(engine)

# 3. Create embeddings
model = SentenceTransformer("all-MiniLM-L6-v2", device='cpu')
embs = model.encode([c["text"] for c in cards], convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)

# 4. Push into Qdrant
client = QdrantClient(url=QDRANT_URL)

client.recreate_collection(
collection_name=COLLECTION,
vectors_config=VectorParams(size=embs.shape[1], distance=Distance.COSINE)
)

points = [
PointStruct(
id=i,
vector=emb.tolist(),
payload={"table": c["table"], "text": c["text"], "type": "table_card"}
)
for i, (c, emb) in enumerate(zip(cards, embs))
]

client.upsert(collection_name=COLLECTION, points=points)

print(f"Uploaded {len(points)} table cards into Qdrant collection '{COLLECTION}'")
Loading