@@ -58,7 +58,7 @@ def system_message_chat_conversation(self):
5858 return """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
5959 Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
6060 If the question is not in English, answer in the language used in the question.
61- Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
61+ {sources_reference_content}
6262 {follow_up_questions_prompt}
6363 {injected_prompt}
6464 """
@@ -96,106 +96,89 @@ async def run_until_final_call(
9696 top = overrides .get ("top" , 3 )
9797 minimum_search_score = overrides .get ("minimum_search_score" , 0.0 )
9898 minimum_reranker_score = overrides .get ("minimum_reranker_score" , 0.0 )
99+ include_category = overrides .get ("include_category" )
99100 filter = self .build_filter (overrides , auth_claims )
100101
101102 original_user_query = messages [- 1 ]["content" ]
102103 if not isinstance (original_user_query , str ):
103104 raise ValueError ("The most recent message content must be a string." )
104105 user_query_request = "Generate search query for: " + original_user_query
105106
106- tools : List [ChatCompletionToolParam ] = [
107- {
108- "type" : "function" ,
109- "function" : {
110- "name" : "search_sources" ,
111- "description" : "Retrieve sources from the Azure AI Search index" ,
112- "parameters" : {
113- "type" : "object" ,
114- "properties" : {
115- "search_query" : {
116- "type" : "string" ,
117- "description" : "Query string to retrieve documents from azure search eg: 'Health care plan'" ,
118- }
107+ sources_content = []
108+ extra_info = {"thoughts" : [], 'data_points' : []}
109+
110+ if include_category != "__NONE__" :
111+ tools : List [ChatCompletionToolParam ] = [
112+ {
113+ "type" : "function" ,
114+ "function" : {
115+ "name" : "search_sources" ,
116+ "description" : "Retrieve sources from the Azure AI Search index" ,
117+ "parameters" : {
118+ "type" : "object" ,
119+ "properties" : {
120+ "search_query" : {
121+ "type" : "string" ,
122+ "description" : "Query string to retrieve documents from azure search eg: 'Health care plan'" ,
123+ }
124+ },
125+ "required" : ["search_query" ],
119126 },
120- "required" : ["search_query" ],
121127 },
122- },
123- }
124- ]
128+ }
129+ ]
125130
126- # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
127- query_response_token_limit = 100
128- query_messages = build_messages (
129- model = self .chatgpt_model ,
130- system_prompt = self .query_prompt_template ,
131- tools = tools ,
132- few_shots = self .query_prompt_few_shots ,
133- past_messages = messages [:- 1 ],
134- new_user_content = user_query_request ,
135- max_tokens = self .chatgpt_token_limit - query_response_token_limit ,
136- fallback_to_default = self .ALLOW_NON_GPT_MODELS ,
137- )
131+ # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
132+ query_response_token_limit = 100
133+ query_messages = build_messages (
134+ model = self .chatgpt_model ,
135+ system_prompt = self .query_prompt_template ,
136+ tools = tools ,
137+ few_shots = self .query_prompt_few_shots ,
138+ past_messages = messages [:- 1 ],
139+ new_user_content = user_query_request ,
140+ max_tokens = self .chatgpt_token_limit - query_response_token_limit ,
141+ fallback_to_default = self .ALLOW_NON_GPT_MODELS ,
142+ )
138143
139- chat_completion : ChatCompletion = await self .openai_client .chat .completions .create (
140- messages = query_messages , # type: ignore
141- # Azure OpenAI takes the deployment name as the model name
142- model = self .chatgpt_deployment if self .chatgpt_deployment else self .chatgpt_model ,
143- temperature = 0.0 , # Minimize creativity for search query generation
144- max_tokens = query_response_token_limit , # Setting too low risks malformed JSON, setting too high may affect performance
145- n = 1 ,
146- tools = tools ,
147- seed = seed ,
148- )
144+ chat_completion : ChatCompletion = await self .openai_client .chat .completions .create (
145+ messages = query_messages , # type: ignore
146+ # Azure OpenAI takes the deployment name as the model name
147+ model = self .chatgpt_deployment if self .chatgpt_deployment else self .chatgpt_model ,
148+ temperature = 0.0 , # Minimize creativity for search query generation
149+ max_tokens = query_response_token_limit , # Setting too low risks malformed JSON, setting too high may affect performance
150+ n = 1 ,
151+ tools = tools ,
152+ seed = seed ,
153+ )
149154
150- query_text = self .get_search_query (chat_completion , original_user_query )
151-
152- # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
153-
154- # If retrieval mode includes vectors, compute an embedding for the query
155- vectors : list [VectorQuery ] = []
156- if use_vector_search :
157- vectors .append (await self .compute_text_embedding (query_text ))
158-
159- results = await self .search (
160- top ,
161- query_text ,
162- filter ,
163- vectors ,
164- use_text_search ,
165- use_vector_search ,
166- use_semantic_ranker ,
167- use_semantic_captions ,
168- minimum_search_score ,
169- minimum_reranker_score ,
170- )
171-
172- sources_content = self .get_sources_content (results , use_semantic_captions , use_image_citation = False )
173- content = "\n " .join (sources_content )
155+ query_text = self .get_search_query (chat_completion , original_user_query )
174156
175- # STEP 3: Generate a contextual and content specific answer using the search results and chat history
157+ # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
176158
177- # Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
178- system_message = self .get_system_prompt (
179- overrides .get ("prompt_template" ),
180- self .follow_up_questions_prompt_content if overrides .get ("suggest_followup_questions" ) else "" ,
181- )
159+ # If retrieval mode includes vectors, compute an embedding for the query
160+ vectors : list [VectorQuery ] = []
161+ if use_vector_search :
162+ vectors .append (await self .compute_text_embedding (query_text ))
182163
183- response_token_limit = 1024
184- messages = build_messages (
185- model = self .chatgpt_model ,
186- system_prompt = system_message ,
187- past_messages = messages [:- 1 ],
188- # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
189- new_user_content = original_user_query + "\n \n Sources:\n " + content ,
190- max_tokens = self .chatgpt_token_limit - response_token_limit ,
191- fallback_to_default = self .ALLOW_NON_GPT_MODELS ,
192- )
164+ results = await self .search (
165+ top ,
166+ query_text ,
167+ filter ,
168+ vectors ,
169+ use_text_search ,
170+ use_vector_search ,
171+ use_semantic_ranker ,
172+ use_semantic_captions ,
173+ minimum_search_score ,
174+ minimum_reranker_score ,
175+ )
193176
194- data_points = {"text" : sources_content }
177+ sources_content = self .get_sources_content (results , use_semantic_captions , use_image_citation = False )
178+ if sources_content :
179+ extra_info ["data_points" ] = {"text" : sources_content }
195180
196- extra_info = {
197- "data_points" : data_points ,
198- "thoughts" : [
181+ extra_info ["thoughts" ].extend ([
199182 ThoughtStep (
200183 "Prompt to generate search query" ,
201184 query_messages ,
@@ -221,20 +204,47 @@ async def run_until_final_call(
221204 "Search results" ,
222205 [result .serialize_for_results () for result in results ],
223206 ),
224- ThoughtStep (
225- "Prompt to generate answer" ,
226- messages ,
227- (
228- {"model" : self .chatgpt_model , "deployment" : self .chatgpt_deployment }
229- if self .chatgpt_deployment
230- else {"model" : self .chatgpt_model }
231- ),
207+ ])
208+
209+ # STEP 3: Generate a contextual and content specific answer
210+
211+ # Additional prompt injected into the masterprompt if RAG is enabled
212+ sources_reference_content = """
213+ Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, for example [info1.txt]. Don't combine sources, list each source separately, for example [info1.txt][info2.pdf].
214+ """ if include_category != "__NONE__" else ""
215+
216+ # Allow client to replace the entire prompt, or to inject into the existing prompt using >>>
217+ system_message = self .get_system_prompt (
218+ overrides .get ("prompt_template" ),
219+ self .follow_up_questions_prompt_content if overrides .get ("suggest_followup_questions" ) else "" ,
220+ sources_reference_content = sources_reference_content
221+ )
222+
223+ response_token_limit = 1024
224+ messages = build_messages (
225+ model = self .chatgpt_model ,
226+ system_prompt = system_message ,
227+ past_messages = messages [:- 1 ],
228+ new_user_content = original_user_query + ("\n \n Sources:\n " + "\n " .join (sources_content ) if sources_content else "" ),
229+ max_tokens = self .chatgpt_token_limit - response_token_limit ,
230+ fallback_to_default = self .ALLOW_NON_GPT_MODELS ,
231+ )
232+
233+ data_points = {"text" : sources_content }
234+
235+ extra_info ["thoughts" ].append (
236+ ThoughtStep (
237+ "Prompt to generate answer" ,
238+ messages ,
239+ (
240+ {"model" : self .chatgpt_model , "deployment" : self .chatgpt_deployment }
241+ if self .chatgpt_deployment
242+ else {"model" : self .chatgpt_model }
232243 ),
233- ],
234- }
244+ )
245+ )
235246
236247 chat_coroutine = self .openai_client .chat .completions .create (
237- # Azure OpenAI takes the deployment name as the model name
238248 model = self .chatgpt_deployment if self .chatgpt_deployment else self .chatgpt_model ,
239249 messages = messages ,
240250 temperature = overrides .get ("temperature" , 0.3 ),
0 commit comments