Skip to content

Commit d93b315

Browse files
committed
added an array to store past conversations
1 parent 694d39c commit d93b315

File tree

1 file changed

+40
-31
lines changed

1 file changed

+40
-31
lines changed

app/rag_system.py

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from sentence_transformers import SentenceTransformer
55
import numpy as np
66
from sklearn.metrics.pairwise import cosine_similarity
7-
import re
87

98
# Ensure you have set the OPENAI_API_KEY in your environment variables
109
openai.api_key = os.getenv("OPENAI_API_KEY")
@@ -15,6 +14,7 @@ def __init__(self, knowledge_base_path='knowledge_base.json'):
1514
self.knowledge_base = self.load_knowledge_base()
1615
self.model = SentenceTransformer('all-MiniLM-L6-v2')
1716
self.doc_embeddings = self.embed_knowledge_base()
17+
self.conversation_history = [] # To store the conversation history
1818

1919
def load_knowledge_base(self):
2020
"""
@@ -38,7 +38,9 @@ def normalize_query(self, query):
3838
return query.lower().strip()
3939

4040
def retrieve(self, query, similarity_threshold=0.7, high_match_threshold=0.8, max_docs=5):
41-
# Normalize query
41+
"""
42+
Retrieve relevant documents from the knowledge base using cosine similarity.
43+
"""
4244
normalized_query = self.normalize_query(query)
4345
print(f"Retrieving context for query: '{normalized_query}'")
4446

@@ -52,24 +54,18 @@ def retrieve(self, query, similarity_threshold=0.7, high_match_threshold=0.8, ma
5254
relevance_scores = []
5355

5456
for i, doc in enumerate(self.knowledge_base):
55-
# Calculate about and text similarities separately
5657
about_similarity = cosine_similarity(query_embedding, self.model.encode([doc["about"]]))[0][0]
57-
text_similarity = similarities[i] # Already calculated
58+
text_similarity = similarities[i]
5859

59-
# Give more weight to text similarity
6060
combined_score = (0.3 * about_similarity) + (0.7 * text_similarity)
61-
62-
# If either about or text similarity is above the high match threshold, prioritize it
6361
if about_similarity >= high_match_threshold or text_similarity >= high_match_threshold:
6462
combined_score = max(about_similarity, text_similarity)
6563

6664
relevance_scores.append((i, combined_score))
6765

68-
# Sort by combined score in descending order
6966
sorted_indices = sorted(relevance_scores, key=lambda x: x[1], reverse=True)
7067
top_indices = [i for i, score in sorted_indices[:max_docs] if score >= similarity_threshold]
7168

72-
# Retrieve the most relevant documents, including both 'about' and 'text' fields
7369
retrieved_docs = [f'{self.knowledge_base[i]["about"]}. {self.knowledge_base[i]["text"]}' for i in top_indices]
7470

7571
if not retrieved_docs:
@@ -82,53 +78,58 @@ def retrieve(self, query, similarity_threshold=0.7, high_match_threshold=0.8, ma
8278
return context
8379

8480
def generate_response(self, query, context):
85-
# Normalize query
86-
normalized_query = self.normalize_query(query)
87-
print(f"Generating response for query: '{normalized_query}'")
81+
"""
82+
Generate a response from OpenAI using conversation history and knowledge base context.
83+
"""
84+
# Add user query to conversation history
85+
self.conversation_history.append({"role": "user", "content": query})
8886

89-
try:
90-
prompt = (
87+
# Include conversation history in the prompt
88+
messages = [{"role": "system", "content": "You are a helpful assistant."}]
89+
messages.extend(self.conversation_history) # Include past conversation history
90+
91+
# Prepare the context from the knowledge base
92+
system_message = {
93+
"role": "system",
94+
"content": (
9195
"You are a dedicated assistant designed to provide positive answers about Defang. "
92-
"When the user says 'you', 'your', or any pronoun, interpret it as referring to Defang with context of Defang also. "
96+
"When the user says 'you', 'your', or any pronoun, interpret it as referring to Defang with context of Defang. "
9397
"If the user's question involves comparisons with or references to other services, you may use external knowledge. "
9498
"However, if the question is strictly about Defang, you must ignore all external knowledge and only utilize the given context. "
95-
"When generating the answer, please put the answer first and the justification later. "
96-
"Any mentions of BYOD means BRING YOUR OWN DOMAIN and NOT BRING YOUR OWN DEVICE."
97-
"Your objective is to remain strictly within the confines of the given context unless comparisons to other services are explicitly mentioned. "
98-
"Although this rarely happens, if the prompt is not related to defang reply with prompt out of scope. If the prompt contains the word `defang` proceed with answering"
99-
"\n\nContext:\n" + context + "\n\n"
100-
"User Question: " + query + "\n\n"
101-
"Answer:"
99+
"Context: " + context
102100
)
101+
}
102+
103+
messages.append(system_message)
103104

105+
try:
104106
response = openai.ChatCompletion.create(
105107
model="gpt-4-turbo",
106-
messages=[
107-
{"role": "system", "content": "You are a helpful assistant."},
108-
{"role": "system", "content": prompt},
109-
{"role": "user", "content": normalized_query}
110-
],
111-
temperature=0.05,
108+
messages=messages,
109+
temperature=0.5,
112110
max_tokens=2048,
113111
top_p=1,
114112
frequency_penalty=0,
115113
presence_penalty=0
116114
)
117115

118-
# Print the response generated by the model
119116
generated_response = response['choices'][0]['message']['content'].strip()
120117

121-
print("Generated Response:\n", generated_response)
118+
# Add the bot's response to the conversation history
119+
self.conversation_history.append({"role": "assistant", "content": generated_response})
122120

121+
print("Generated Response:\n", generated_response)
123122
return generated_response
124123

125124
except openai.error.OpenAIError as e:
126125
print(f"Error generating response from OpenAI: {e}")
127126
return "An error occurred while generating the response."
128127

129128
def answer_query(self, query):
129+
"""
130+
Answer the user query, leveraging knowledge base context and conversation history.
131+
"""
130132
try:
131-
# Normalize query before use
132133
normalized_query = self.normalize_query(query)
133134
context = self.retrieve(normalized_query)
134135
response = self.generate_response(normalized_query, context)
@@ -137,6 +138,14 @@ def answer_query(self, query):
137138
print(f"Error in answer_query: {e}")
138139
return "An error occurred while generating the response."
139140

141+
def clear_conversation_history(self):
142+
"""
143+
Clear the stored conversation history.
144+
This can be called to reset the conversation for a new session.
145+
"""
146+
self.conversation_history = []
147+
print("Conversation history cleared.")
148+
140149
def rebuild_embeddings(self):
141150
"""
142151
Rebuild the embeddings for the knowledge base. This should be called whenever the knowledge base is updated.

0 commit comments

Comments
 (0)