-
Notifications
You must be signed in to change notification settings - Fork 31
Expand file tree
/
Copy pathchatbot.py
More file actions
125 lines (101 loc) · 4.36 KB
/
chatbot.py
File metadata and controls
125 lines (101 loc) · 4.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import sys
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv
# Configuration
TOP_K = 3 # Number of similar documents to retrieve (configurable)
MODEL_NAME = "gpt-4" # Default model
# Load environment variables
load_dotenv()
def load_vectorstore(index_path="faiss_index"):
"""Load FAISS index from disk"""
if not os.path.exists(index_path):
print(f"Error: Index not found at '{index_path}'")
print("Please run indexer.py first to create the index!")
return None
try:
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.load_local(
index_path,
embeddings,
allow_dangerous_deserialization=True
)
return vectorstore
except Exception as e:
print(f"Error loading index: {e}")
return None
def get_similar_documents(vectorstore, query, k=TOP_K):
"""Find top K similar documents with scores"""
docs_and_scores = vectorstore.similarity_search_with_score(query, k=k)
return docs_and_scores
def generate_answer(llm, query, context_docs):
"""Generate answer using LLM with retrieved context"""
# Build context from documents
context = "\n\n".join([doc.page_content for doc in context_docs])
# Create prompt
prompt = ChatPromptTemplate.from_messages([
("system", "You are a helpful assistant. Answer the question based on the provided context from PDF documents. If the context doesn't contain relevant information, say 'I don't know.' Keep your response concise and within approximately 3000 characters to ensure complete answers without truncation."),
("user", f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:")
])
# Get response
messages = prompt.format_messages()
response = llm.invoke(messages)
return response.content
def main():
"""Simple chatbot: query → retrieve → show scores → generate answer"""
print("=" * 60)
print("PDF RAG Chatbot")
print("=" * 60)
# Check for OpenAI API key
if not os.getenv("OPENAI_API_KEY"):
print("Error: OPENAI_API_KEY not found!")
print("Create a .env file with: OPENAI_API_KEY=your_key")
sys.exit(1)
# Get index path
index_path = sys.argv[1] if len(sys.argv) > 1 else "faiss_index"
# Load vectorstore
print(f"Loading index from '{index_path}'...")
vectorstore = load_vectorstore(index_path)
if vectorstore is None:
sys.exit(1)
# Initialize LLM
print(f"Initializing {MODEL_NAME}...")
llm = ChatOpenAI(model_name=MODEL_NAME, temperature=0.1, max_tokens=4096)
print(f"\n✓ Ready! Using {MODEL_NAME}, retrieving top {TOP_K} matches")
print("Type 'exit' to quit")
print("-" * 60)
# Main loop
while True:
print("\n")
query = input("You: ").strip()
if not query:
continue
if query.lower() in ['exit', 'quit']:
print("Goodbye!")
break
try:
# Step 1: Find similar documents
docs_with_scores = get_similar_documents(vectorstore, query, k=TOP_K)
# Step 2: Display matches with scores and text snippets
print(f"\n🔍 Top {TOP_K} Similar Documents:")
for i, (doc, score) in enumerate(docs_with_scores, 1):
source = doc.metadata.get("source", "Unknown")
status = "✅" if score < 0.4 else "⚠️" if score < 0.5 else "❌"
# Get text snippet (first 200 chars)
text_snippet = doc.page_content[:200].replace('\n', ' ')
if len(doc.page_content) > 200:
text_snippet += "..."
print(f"\n {i}. {source} - Score: {score:.3f} {status}")
print(f" \"{text_snippet}\"")
# Step 3: Generate answer using LLM
context_docs = [doc for doc, score in docs_with_scores]
print("\nBot: ", end="", flush=True)
answer = generate_answer(llm, query, context_docs)
print(answer)
except Exception as e:
print(f"\nError: {e}")
print("Please try again.")
if __name__ == "__main__":
main()