Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 136 additions & 0 deletions examples/discord_bot/collect_qa_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env python3
"""
Script to collect top-level questions from Discord channel for pgai bot evaluation.
Extracts only the initial questions from threads for benchmarking.
"""

import asyncio
import json
import os
from datetime import datetime

import discord
from dotenv import load_dotenv

load_dotenv()


class QuestionCollector(discord.Client):
def __init__(self, target_channel_id: int):
self.target_channel_id = target_channel_id
self.questions: list[str] = []
self.collection_done = False

intents = discord.Intents.default()
intents.message_content = True
intents.guilds = True
super().__init__(intents=intents)

async def on_ready(self):
"""Called when the bot is ready."""
print(f"Logged in as {self.user}")
await self.collect_questions()
self.collection_done = True
await self.close()

async def collect_questions(self) -> list[str]:
"""Collect top-level questions from the Discord channel."""
print("Collecting questions...")

# Get the channel
channel = None
for guild in self.guilds:
channel = guild.get_channel(self.target_channel_id)
if channel:
break

if not channel:
print(f"Channel {self.target_channel_id} not found")
return []

print(f"Collecting from channel: {channel.name}")

if not isinstance(channel, discord.TextChannel):
print("Channel is not a text channel")
return []

# Collect only messages that create threads (top-level questions)
try:
async for message in channel.history(limit=None):
# Only collect messages that:
# 1. Are not from bots
# 2. Have content
# 3. Have a thread (meaning they started a conversation)
if (
not message.author.bot
and message.content.strip()
and message.thread is not None
):
self.questions.append(message.content.strip())
print(f"Found question: {message.content[:50]}...")
except Exception as e:
print(f"Error fetching channel messages: {e}")

print(f"Collected {len(self.questions)} questions")
return self.questions


async def main():
"""Main function to run the question collection."""
bot_token = os.environ.get("DISCORD_BOT_TOKEN")
if not bot_token:
print("Error: DISCORD_BOT_TOKEN not found in environment")
return

channel_id = 1331981876319223879

collector = QuestionCollector(channel_id)

# Start the bot in a task
bot_task = asyncio.create_task(collector.start(bot_token))

# Wait for collection to complete with timeout
start_time = asyncio.get_event_loop().time()
timeout = 30 # 30 seconds timeout

print("Waiting for collection to complete...")
while not collector.collection_done:
elapsed = asyncio.get_event_loop().time() - start_time
if elapsed > timeout:
print(f"Timeout after {timeout} seconds")
collector.collection_done = True
break
await asyncio.sleep(1)

# Cancel the bot task if it's still running
if not bot_task.done():
bot_task.cancel()
try:
await bot_task
except asyncio.CancelledError:
pass

questions = collector.questions

if questions:
# Save to JSON file
output_file = (
f"discord_questions_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
)
with open(output_file, "w", encoding="utf-8") as f:
json.dump(questions, f, indent=2, ensure_ascii=False)

print(f"Questions saved to {output_file}")
print(f"Total questions collected: {len(questions)}")

# Show first few questions
print("\nFirst few questions:")
for i, question in enumerate(questions[:5]):
preview = question[:100] + "..." if len(question) > 100 else question
print(f"{i+1}. {preview}")
else:
print("No questions collected")


if __name__ == "__main__":
asyncio.run(main())
35 changes: 35 additions & 0 deletions examples/discord_bot/discord_questions_20250619_151250.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[
"Can you run pgai without docker on windows?",
"I was wondering for the following information. \n\nDoes the PDF parser also include images? \n\nDoes the embedding take these images into account? \n\nHow to enable this?",
"how do i add a vectorizer to an existing table",
"<#1331981876319223879> how to optimize pgvectorscale on a postgres17 timescaledb with a database of 50M+ rows with embeddings of 1024. I already use matryoshka of 512",
"<#1331981876319223879> how do i delete a vectorizer",
"<#1331981876319223879> When using the parsing options like docling do I have the posibility to select the output format after the parsing? Or is the markdown output format by default for everything?",
"<#1331981876319223879> When using Docling, are the models used to parse documents stored locally in a self-hosted Postgres database?",
"In pg_ai, is it possible to explicitly provide embedding (eg in unit tests), so it's not automatically generated?",
"fix: ```2025-06-05 12:48:28 [debug ] | docling.exceptions.ConversionError: File format not allowed: uploads/1/AmazonQDev.pdf ``` in worker",
"ERROR: Could not find a version that satisfies the requirement pgai (from versions: none) How can I solve this error when I install pgai by this command? pip install pgai",
"IN my cloudberry environment, postgresql version is 14. How can I install pgvectorscale when requirement is postgresql version 16?",
"what is your experience in chunking strategies? What are some good working strategies and what strategies work less good?",
"generate_rag_response\n\nHow to implement this function",
"I have created a storage bucket for files right now, how to create a well function vectorizer for this",
"im getting this error: \n\n[42883] ERROR: function ai. openai_embed(text, text, api_key_name => text, dimensions => integer, openai_user => text) does not exist Hint: No function matches the given name and argument types. You might need to add explicit type casts. Where: PL/ pgSQL function ai. vectorizer_embed(jsonb,text,text) line 7 at assignment SQL function \"vectorizer_embed\" statement 1\n\n\nSELECT\n chunk,\n embedding <=> ai.vectorizer_embed(2, 'fraude', 'string') as distance\nFROM risk_embeddings\nORDER BY distance;\n\nFor this",
"Hi",
"How does it work with different types of data that I should parse. Lets say I have one huge document table, which can either raw text, a file document (docx, pdf etc), or both. How does the embedding work for that?",
"How does the pgai s3 integration work?",
"```\nn8n-db=# SELECT ai.create_vectorizer(\n 'blog'::regclass,\n loading => ai.loading_column('contents'),\n embedding => ai.embedding_ollama('nomic-embed-text', 768),\n destination => ai.destination_table('blog_contents_embeddings')\n);\nERROR: function ai.loading_column(unknown) does not exist\nLINE 3: loading => ai.loading_column('contents'),\n ^\nHINT: No function matches the given name and argument types. You might need to add explicit type casts.\n```\nI am getting this error even if the function is there, when i run this query\n`SELECT routine_name, routine_type FROM information_schema.routines WHERE routine_schema = 'ai'`",
"which version of pgai has the text-to-sql feature included ?",
"can i use any hugging face model in pgai?",
"How to calculate size of index in pgvector scale",
"hello please share the link to download the library pgai vectorizer",
"Is there a way to use PGAI with Gemini?",
"Is it normal for pgvectorscale streamingdisk ann to take 3 hours to build the index for 12 million rows with 75 dimensions vector?",
"Is there a way to vectorize multiple columns of a table? for exampe, concating some columns into a single string.",
"Hello, is it possible to store chunks in the source table instead of the full documents and disable chunking in the vectorizer configuration?",
"<@1326487316499529760> Installed pgai, am I able to make responses on endpoint?\n",
"Installed pgai, am I able to make responses on endpoint?",
"Tell me about yourself <@&1249549447000490075>",
"How do I create a vectorizer with voyage ai embeddings?",
"How can I use pgai with sqlalchemy?",
"Hello World?"
]
193 changes: 193 additions & 0 deletions examples/discord_bot/eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/usr/bin/env python3
"""
Evaluation script for pgai Discord bot.
Tests the bot's RAG pipeline against a list of questions and uses an LLM judge to evaluate responses.
"""

import asyncio
import json
import logging
import sys

from dotenv import load_dotenv

# Import the refactored functions from main.py
from pgai_discord_bot.main import (
generate_rag_response,
openai_client,
retrieve_relevant_documents,
)

load_dotenv()

# Set up logging
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


async def evaluate_response_with_llm(
question: str, bot_response: str, relevant_docs: str
) -> dict:
"""Use an LLM judge to evaluate if the bot response adequately answers the question."""
judge_prompt = f"""
You are an expert evaluator for a technical documentation chatbot. Your task is to evaluate whether a bot response adequately answers a user's question about pgai (a PostgreSQL AI extension).

USER QUESTION: {question}

BOT RESPONSE: {bot_response}

RELEVANT DOCUMENTATION USED: {relevant_docs}

Please evaluate the response on these criteria:
1. ACCURACY: Is the information provided factually correct based on the documentation?
2. COMPLETENESS: Does the response adequately address the user's question?
3. CLARITY: Is the response clear and easy to understand?
4. RELEVANCE: Is the response relevant to the question asked?
5. HELPFULNESS: Would this response help the user solve their problem?

Provide your evaluation in the following JSON format:
{{
"overall_score": <score from 1-10>,
"accuracy": <score from 1-10>,
"completeness": <score from 1-10>,
"clarity": <score from 1-10>,
"relevance": <score from 1-10>,
"helpfulness": <score from 1-10>,
"reasoning": "<brief explanation of your evaluation>",
"issues": "<any specific issues you identified>",
"strengths": "<any specific strengths you identified>"
}}
"""

chat_completion = await openai_client.chat.completions.create(
messages=[{"content": judge_prompt, "role": "user"}],
model="gpt-4o",
temperature=0.1,
)

try:
# Extract JSON from the response
response_content = chat_completion.choices[0].message.content or ""
# Find JSON in the response (in case there's extra text)
start_idx = response_content.find("{")
end_idx = response_content.rfind("}") + 1
json_str = response_content[start_idx:end_idx]
return json.loads(json_str)
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"Failed to parse LLM judge response: {e}")
return {
"overall_score": 0,
"accuracy": 0,
"completeness": 0,
"clarity": 0,
"relevance": 0,
"helpfulness": 0,
"reasoning": "Failed to parse evaluation",
"issues": f"JSON parsing error: {e}",
"strengths": "N/A",
}


async def run_evaluation(questions_file: str, max_questions: int = None) -> None:
"""Run the complete evaluation pipeline."""
logger.info("Starting evaluation...")

# Load questions
with open(questions_file, encoding="utf-8") as f:
questions = json.load(f)

if max_questions:
questions = questions[:max_questions]
logger.info(f"Running evaluation on first {len(questions)} questions")
else:
logger.info(f"Loaded {len(questions)} questions")

results = []

for i, question in enumerate(questions, 1):
logger.info(f"Processing question {i}/{len(questions)}: {question[:100]}...")

try:
# Get relevant docs
relevant_docs = await retrieve_relevant_documents(question)

# Generate bot response using the refactored function
bot_response = await generate_rag_response(question)

# Evaluate with LLM judge
evaluation = await evaluate_response_with_llm(
question, bot_response, relevant_docs
)

result = {
"question_id": i,
"question": question,
"bot_response": bot_response,
"relevant_docs": relevant_docs,
"evaluation": evaluation,
}

results.append(result)

logger.info(
f"Question {i} - Overall Score: {evaluation.get('overall_score', 'N/A')}"
)

except Exception as e:
logger.error(f"Error processing question {i}: {e}")
results.append(
{
"question_id": i,
"question": question,
"bot_response": f"ERROR: {str(e)}",
"relevant_docs": "",
"evaluation": {
"overall_score": 0,
"reasoning": f"Processing error: {e}",
"issues": str(e),
"accuracy": 0,
"completeness": 0,
"clarity": 0,
"relevance": 0,
"helpfulness": 0,
"strengths": "N/A",
},
}
)

# Save results
output_file = "eval_results.json"
with open(output_file, "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)

# Print summary
scores = [r["evaluation"].get("overall_score", 0) for r in results]
avg_score = sum(scores) / len(scores) if scores else 0

print("\n=== EVALUATION SUMMARY ===")
print(f"Total questions: {len(questions)}")
print(f"Average overall score: {avg_score:.2f}/10")
print("Score distribution:")
print(f" - Excellent (9-10): {sum(1 for s in scores if s >= 9)}")
print(f" - Good (7-8): {sum(1 for s in scores if 7 <= s < 9)}")
print(f" - Fair (5-6): {sum(1 for s in scores if 5 <= s < 7)}")
print(f" - Poor (1-4): {sum(1 for s in scores if 1 <= s < 5)}")
print(f" - Failed (0): {sum(1 for s in scores if s == 0)}")
print(f"\nDetailed results saved to: {output_file}")


if __name__ == "__main__":
max_questions = None
if len(sys.argv) > 2:
try:
max_questions = int(sys.argv[2])
except ValueError:
print("Usage: python eval.py <questions_file> [max_questions]")
sys.exit(1)

questions_file = (
sys.argv[1] if len(sys.argv) > 1 else "discord_questions_20250619_151250.json"
)
asyncio.run(run_evaluation(questions_file, max_questions))
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


def upgrade() -> None:
op.execute("CREATE EXTENSION IF NOT EXISTS ai CASCADE;")
op.execute("CREATE EXTENSION IF NOT EXISTS ai VERSION '0.8.0' CASCADE;")
op.create_vectorizer(
source="documents",
embedding=EmbeddingOpenaiConfig(model="text-embedding-3-small", dimensions=768),
Expand Down
Loading
Loading