Skip to content

Commit eff2433

Browse files
feat: make-rag-optional-but-default
1 parent 86b6dad commit eff2433

File tree

12 files changed

+252
-208
lines changed

12 files changed

+252
-208
lines changed

app/backend/approaches/chatapproach.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,24 @@ def system_message_chat_conversation(self) -> str:
4545
async def run_until_final_call(self, messages, overrides, auth_claims, should_stream) -> tuple:
4646
pass
4747

48-
def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str) -> str:
48+
def get_system_prompt(self, override_prompt: Optional[str], follow_up_questions_prompt: str, sources_reference_content: str = "") -> str:
4949
if override_prompt is None:
5050
return self.system_message_chat_conversation.format(
51-
injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
51+
injected_prompt="",
52+
follow_up_questions_prompt=follow_up_questions_prompt,
53+
sources_reference_content=sources_reference_content
5254
)
5355
elif override_prompt.startswith(">>>"):
5456
return self.system_message_chat_conversation.format(
55-
injected_prompt=override_prompt[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
57+
injected_prompt=override_prompt[3:] + "\n",
58+
follow_up_questions_prompt=follow_up_questions_prompt,
59+
sources_reference_content=sources_reference_content
5660
)
5761
else:
58-
return override_prompt.format(follow_up_questions_prompt=follow_up_questions_prompt)
62+
return override_prompt.format(
63+
follow_up_questions_prompt=follow_up_questions_prompt,
64+
sources_reference_content=sources_reference_content
65+
)
5966

6067
def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
6168
response_message = chat_completion.choices[0].message

app/backend/approaches/chatreadretrieveread.py

Lines changed: 105 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def system_message_chat_conversation(self):
5858
return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
5959
Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
6060
If the question is not in English, answer in the language used in the question.
61-
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
61+
{sources_reference_content}
6262
{follow_up_questions_prompt}
6363
{injected_prompt}
6464
"""
@@ -96,106 +96,89 @@ async def run_until_final_call(
9696
top = overrides.get("top", 3)
9797
minimum_search_score = overrides.get("minimum_search_score", 0.0)
9898
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
99+
include_category = overrides.get("include_category")
99100
filter = self.build_filter(overrides, auth_claims)
100101

101102
original_user_query = messages[-1]["content"]
102103
if not isinstance(original_user_query, str):
103104
raise ValueError("The most recent message content must be a string.")
104105
user_query_request = "Generate search query for: " + original_user_query
105106

106-
tools: List[ChatCompletionToolParam] = [
107-
{
108-
"type": "function",
109-
"function": {
110-
"name": "search_sources",
111-
"description": "Retrieve sources from the Azure AI Search index",
112-
"parameters": {
113-
"type": "object",
114-
"properties": {
115-
"search_query": {
116-
"type": "string",
117-
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
118-
}
107+
sources_content = []
108+
extra_info = {"thoughts": [], 'data_points': []}
109+
110+
if include_category != "__NONE__":
111+
tools: List[ChatCompletionToolParam] = [
112+
{
113+
"type": "function",
114+
"function": {
115+
"name": "search_sources",
116+
"description": "Retrieve sources from the Azure AI Search index",
117+
"parameters": {
118+
"type": "object",
119+
"properties": {
120+
"search_query": {
121+
"type": "string",
122+
"description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
123+
}
124+
},
125+
"required": ["search_query"],
119126
},
120-
"required": ["search_query"],
121127
},
122-
},
123-
}
124-
]
128+
}
129+
]
125130

126-
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
127-
query_response_token_limit = 100
128-
query_messages = build_messages(
129-
model=self.chatgpt_model,
130-
system_prompt=self.query_prompt_template,
131-
tools=tools,
132-
few_shots=self.query_prompt_few_shots,
133-
past_messages=messages[:-1],
134-
new_user_content=user_query_request,
135-
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
136-
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
137-
)
131+
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
132+
query_response_token_limit = 100
133+
query_messages = build_messages(
134+
model=self.chatgpt_model,
135+
system_prompt=self.query_prompt_template,
136+
tools=tools,
137+
few_shots=self.query_prompt_few_shots,
138+
past_messages=messages[:-1],
139+
new_user_content=user_query_request,
140+
max_tokens=self.chatgpt_token_limit - query_response_token_limit,
141+
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
142+
)
138143

139-
chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
140-
messages=query_messages, # type: ignore
141-
# Azure OpenAI takes the deployment name as the model name
142-
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
143-
temperature=0.0, # Minimize creativity for search query generation
144-
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
145-
n=1,
146-
tools=tools,
147-
seed=seed,
148-
)
144+
chat_completion: ChatCompletion = await self.openai_client.chat.completions.create(
145+
messages=query_messages, # type: ignore
146+
# Azure OpenAI takes the deployment name as the model name
147+
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
148+
temperature=0.0, # Minimize creativity for search query generation
149+
max_tokens=query_response_token_limit, # Setting too low risks malformed JSON, setting too high may affect performance
150+
n=1,
151+
tools=tools,
152+
seed=seed,
153+
)
149154

150-
query_text = self.get_search_query(chat_completion, original_user_query)
151-
152-
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
153-
154-
# If retrieval mode includes vectors, compute an embedding for the query
155-
vectors: list[VectorQuery] = []
156-
if use_vector_search:
157-
vectors.append(await self.compute_text_embedding(query_text))
158-
159-
results = await self.search(
160-
top,
161-
query_text,
162-
filter,
163-
vectors,
164-
use_text_search,
165-
use_vector_search,
166-
use_semantic_ranker,
167-
use_semantic_captions,
168-
minimum_search_score,
169-
minimum_reranker_score,
170-
)
171-
172-
sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
173-
content = "\n".join(sources_content)
155+
query_text = self.get_search_query(chat_completion, original_user_query)
174156

175-
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
157+
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
176158

177-
# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
178-
system_message = self.get_system_prompt(
179-
overrides.get("prompt_template"),
180-
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
181-
)
159+
# If retrieval mode includes vectors, compute an embedding for the query
160+
vectors: list[VectorQuery] = []
161+
if use_vector_search:
162+
vectors.append(await self.compute_text_embedding(query_text))
182163

183-
response_token_limit = 1024
184-
messages = build_messages(
185-
model=self.chatgpt_model,
186-
system_prompt=system_message,
187-
past_messages=messages[:-1],
188-
# Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
189-
new_user_content=original_user_query + "\n\nSources:\n" + content,
190-
max_tokens=self.chatgpt_token_limit - response_token_limit,
191-
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
192-
)
164+
results = await self.search(
165+
top,
166+
query_text,
167+
filter,
168+
vectors,
169+
use_text_search,
170+
use_vector_search,
171+
use_semantic_ranker,
172+
use_semantic_captions,
173+
minimum_search_score,
174+
minimum_reranker_score,
175+
)
193176

194-
data_points = {"text": sources_content}
177+
sources_content = self.get_sources_content(results, use_semantic_captions, use_image_citation=False)
178+
if sources_content:
179+
extra_info["data_points"] = {"text": sources_content}
195180

196-
extra_info = {
197-
"data_points": data_points,
198-
"thoughts": [
181+
extra_info["thoughts"].extend([
199182
ThoughtStep(
200183
"Prompt to generate search query",
201184
query_messages,
@@ -221,20 +204,47 @@ async def run_until_final_call(
221204
"Search results",
222205
[result.serialize_for_results() for result in results],
223206
),
224-
ThoughtStep(
225-
"Prompt to generate answer",
226-
messages,
227-
(
228-
{"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
229-
if self.chatgpt_deployment
230-
else {"model": self.chatgpt_model}
231-
),
207+
])
208+
209+
# STEP 3: Generate a contextual and content specific answer
210+
211+
# Additional prompt injected into the masterprompt if RAG is enabled
212+
sources_reference_content = """
213+
Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
214+
""" if include_category != "__NONE__" else ""
215+
216+
# Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
217+
system_message = self.get_system_prompt(
218+
overrides.get("prompt_template"),
219+
self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else "",
220+
sources_reference_content=sources_reference_content
221+
)
222+
223+
response_token_limit = 1024
224+
messages = build_messages(
225+
model=self.chatgpt_model,
226+
system_prompt=system_message,
227+
past_messages=messages[:-1],
228+
new_user_content=original_user_query + ("\n\nSources:\n" + "\n".join(sources_content) if sources_content else ""),
229+
max_tokens=self.chatgpt_token_limit - response_token_limit,
230+
fallback_to_default=self.ALLOW_NON_GPT_MODELS,
231+
)
232+
233+
data_points = {"text": sources_content}
234+
235+
extra_info["thoughts"].append(
236+
ThoughtStep(
237+
"Prompt to generate answer",
238+
messages,
239+
(
240+
{"model": self.chatgpt_model, "deployment": self.chatgpt_deployment}
241+
if self.chatgpt_deployment
242+
else {"model": self.chatgpt_model}
232243
),
233-
],
234-
}
244+
)
245+
)
235246

236247
chat_coroutine = self.openai_client.chat.completions.create(
237-
# Azure OpenAI takes the deployment name as the model name
238248
model=self.chatgpt_deployment if self.chatgpt_deployment else self.chatgpt_model,
239249
messages=messages,
240250
temperature=overrides.get("temperature", 0.3),

0 commit comments

Comments
 (0)