@@ -41,13 +41,21 @@ class ChatReadRetrieveReadApproach:
41
41
If you cannot generate a search query, return just the number 0.
42
42
"""
43
43
query_prompt_few_shots = [
44
- {' role' : USER , ' content' : ' What are my health plans?' },
45
- {' role' : ASSISTANT , ' content' : ' Show available health plans' },
46
- {' role' : USER , ' content' : ' does my plan cover cardio?' },
47
- {' role' : ASSISTANT , ' content' : ' Health plan cardio coverage' }
44
+ {" role" : USER , " content" : " What are my health plans?" },
45
+ {" role" : ASSISTANT , " content" : " Show available health plans" },
46
+ {" role" : USER , " content" : " does my plan cover cardio?" },
47
+ {" role" : ASSISTANT , " content" : " Health plan cardio coverage" },
48
48
]
49
49
50
- def __init__ (self , search_client : SearchClient , chatgpt_deployment : str , chatgpt_model : str , embedding_deployment : str , sourcepage_field : str , content_field : str ):
50
+ def __init__ (
51
+ self ,
52
+ search_client : SearchClient ,
53
+ chatgpt_deployment : str ,
54
+ chatgpt_model : str ,
55
+ embedding_deployment : str ,
56
+ sourcepage_field : str ,
57
+ content_field : str ,
58
+ ):
51
59
self .search_client = search_client
52
60
self .chatgpt_deployment = chatgpt_deployment
53
61
self .chatgpt_model = chatgpt_model
@@ -56,15 +64,17 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt
56
64
self .content_field = content_field
57
65
self .chatgpt_token_limit = get_token_limit (chatgpt_model )
58
66
59
- async def run_until_final_call (self , history : list [dict [str , str ]], overrides : dict [str , Any ], should_stream : bool = False ) -> tuple :
67
+ async def run_until_final_call (
68
+ self , history : list [dict [str , str ]], overrides : dict [str , Any ], should_stream : bool = False
69
+ ) -> tuple :
60
70
has_text = overrides .get ("retrieval_mode" ) in ["text" , "hybrid" , None ]
61
71
has_vector = overrides .get ("retrieval_mode" ) in ["vectors" , "hybrid" , None ]
62
72
use_semantic_captions = True if overrides .get ("semantic_captions" ) and has_text else False
63
73
top = overrides .get ("top" ) or 3
64
74
exclude_category = overrides .get ("exclude_category" ) or None
65
75
filter = "category ne '{}'" .format (exclude_category .replace ("'" , "''" )) if exclude_category else None
66
76
67
- user_q = ' Generate search query for: ' + history [- 1 ]["user" ]
77
+ user_q = " Generate search query for: " + history [- 1 ]["user" ]
68
78
69
79
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
70
80
messages = self .get_messages_from_history (
@@ -73,89 +83,112 @@ async def run_until_final_call(self, history: list[dict[str, str]], overrides: d
73
83
history ,
74
84
user_q ,
75
85
self .query_prompt_few_shots ,
76
- self .chatgpt_token_limit - len (user_q )
77
- )
86
+ self .chatgpt_token_limit - len (user_q ),
87
+ )
78
88
79
89
chat_completion = await openai .ChatCompletion .acreate (
80
90
deployment_id = self .chatgpt_deployment ,
81
91
model = self .chatgpt_model ,
82
92
messages = messages ,
83
93
temperature = 0.0 ,
84
94
max_tokens = 32 ,
85
- n = 1 )
95
+ n = 1 ,
96
+ )
86
97
87
98
query_text = chat_completion .choices [0 ].message .content
88
99
if query_text .strip () == "0" :
89
- query_text = history [- 1 ]["user" ] # Use the last user input if we failed to generate a better query
100
+ query_text = history [- 1 ]["user" ] # Use the last user input if we failed to generate a better query
90
101
91
102
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
92
103
93
104
# If retrieval mode includes vectors, compute an embedding for the query
94
105
if has_vector :
95
- query_vector = (await openai .Embedding .acreate (engine = self .embedding_deployment , input = query_text ))["data" ][0 ]["embedding" ]
106
+ embedding = await openai .Embedding .acreate (engine = self .embedding_deployment , input = query_text )
107
+ query_vector = embedding ["data" ][0 ]["embedding" ]
96
108
else :
97
109
query_vector = None
98
110
99
- # Only keep the text query if the retrieval mode uses text, otherwise drop it
111
+ # Only keep the text query if the retrieval mode uses text, otherwise drop it
100
112
if not has_text :
101
113
query_text = None
102
114
103
115
# Use semantic L2 reranker if requested and if retrieval mode is text or hybrid (vectors + text)
104
116
if overrides .get ("semantic_ranker" ) and has_text :
105
- r = await self .search_client .search (query_text ,
106
- filter = filter ,
107
- query_type = QueryType .SEMANTIC ,
108
- query_language = "en-us" ,
109
- query_speller = "lexicon" ,
110
- semantic_configuration_name = "default" ,
111
- top = top ,
112
- query_caption = "extractive|highlight-false" if use_semantic_captions else None ,
113
- vector = query_vector ,
114
- top_k = 50 if query_vector else None ,
115
- vector_fields = "embedding" if query_vector else None )
117
+ r = await self .search_client .search (
118
+ query_text ,
119
+ filter = filter ,
120
+ query_type = QueryType .SEMANTIC ,
121
+ query_language = "en-us" ,
122
+ query_speller = "lexicon" ,
123
+ semantic_configuration_name = "default" ,
124
+ top = top ,
125
+ query_caption = "extractive|highlight-false" if use_semantic_captions else None ,
126
+ vector = query_vector ,
127
+ top_k = 50 if query_vector else None ,
128
+ vector_fields = "embedding" if query_vector else None ,
129
+ )
116
130
else :
117
- r = await self .search_client .search (query_text ,
118
- filter = filter ,
119
- top = top ,
120
- vector = query_vector ,
121
- top_k = 50 if query_vector else None ,
122
- vector_fields = "embedding" if query_vector else None )
131
+ r = await self .search_client .search (
132
+ query_text ,
133
+ filter = filter ,
134
+ top = top ,
135
+ vector = query_vector ,
136
+ top_k = 50 if query_vector else None ,
137
+ vector_fields = "embedding" if query_vector else None ,
138
+ )
123
139
if use_semantic_captions :
124
- results = [doc [self .sourcepage_field ] + ": " + nonewlines (" . " .join ([c .text for c in doc ['@search.captions' ]])) async for doc in r ]
140
+ results = [
141
+ doc [self .sourcepage_field ] + ": " + nonewlines (" . " .join ([c .text for c in doc ["@search.captions" ]]))
142
+ async for doc in r
143
+ ]
125
144
else :
126
145
results = [doc [self .sourcepage_field ] + ": " + nonewlines (doc [self .content_field ]) async for doc in r ]
127
146
content = "\n " .join (results )
128
147
129
- follow_up_questions_prompt = self .follow_up_questions_prompt_content if overrides .get ("suggest_followup_questions" ) else ""
148
+ follow_up_questions_prompt = (
149
+ self .follow_up_questions_prompt_content if overrides .get ("suggest_followup_questions" ) else ""
150
+ )
130
151
131
152
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
132
153
133
154
# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
134
155
prompt_override = overrides .get ("prompt_template" )
135
156
if prompt_override is None :
136
- system_message = self .system_message_chat_conversation .format (injected_prompt = "" , follow_up_questions_prompt = follow_up_questions_prompt )
157
+ system_message = self .system_message_chat_conversation .format (
158
+ injected_prompt = "" , follow_up_questions_prompt = follow_up_questions_prompt
159
+ )
137
160
elif prompt_override .startswith (">>>" ):
138
- system_message = self .system_message_chat_conversation .format (injected_prompt = prompt_override [3 :] + "\n " , follow_up_questions_prompt = follow_up_questions_prompt )
161
+ system_message = self .system_message_chat_conversation .format (
162
+ injected_prompt = prompt_override [3 :] + "\n " , follow_up_questions_prompt = follow_up_questions_prompt
163
+ )
139
164
else :
140
165
system_message = prompt_override .format (follow_up_questions_prompt = follow_up_questions_prompt )
141
166
142
167
messages = self .get_messages_from_history (
143
168
system_message ,
144
169
self .chatgpt_model ,
145
170
history ,
146
- history [- 1 ]["user" ]+ "\n \n Sources:\n " + content , # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
147
- max_tokens = self .chatgpt_token_limit )
148
- msg_to_display = '\n \n ' .join ([str (message ) for message in messages ])
149
-
150
- extra_info = {"data_points" : results , "thoughts" : f"Searched for:<br>{ query_text } <br><br>Conversations:<br>" + msg_to_display .replace ('\n ' , '<br>' )}
171
+ # Model does not handle lengthy system messages well.
172
+ # Moved sources to latest user conversation to solve follow up questions prompt.
173
+ history [- 1 ]["user" ] + "\n \n Sources:\n " + content ,
174
+ max_tokens = self .chatgpt_token_limit ,
175
+ )
176
+ msg_to_display = "\n \n " .join ([str (message ) for message in messages ])
177
+
178
+ extra_info = {
179
+ "data_points" : results ,
180
+ "thoughts" : f"Searched for:<br>{ query_text } <br><br>Conversations:<br>"
181
+ + msg_to_display .replace ("\n " , "<br>" ),
182
+ }
151
183
chat_coroutine = openai .ChatCompletion .acreate (
152
- deployment_id = self .chatgpt_deployment ,
153
- model = self .chatgpt_model ,
154
- messages = messages ,
155
- temperature = overrides .get ("temperature" ) or 0.7 ,
156
- max_tokens = 1024 ,
157
- n = 1 ,
158
- stream = should_stream )
184
+ deployment_id = self .chatgpt_deployment ,
185
+ model = self .chatgpt_model ,
186
+ messages = messages ,
187
+ temperature = overrides .get ("temperature" ) or 0.7 ,
188
+ max_tokens = 1024 ,
189
+ n = 1 ,
190
+ stream = should_stream ,
191
+ )
159
192
return (extra_info , chat_coroutine )
160
193
161
194
async def run_without_streaming (self , history : list [dict [str , str ]], overrides : dict [str , Any ]) -> dict [str , Any ]:
@@ -164,19 +197,29 @@ async def run_without_streaming(self, history: list[dict[str, str]], overrides:
164
197
extra_info ["answer" ] = chat_content
165
198
return extra_info
166
199
167
- async def run_with_streaming (self , history : list [dict [str , str ]], overrides : dict [str , Any ]) -> AsyncGenerator [dict , None ]:
200
+ async def run_with_streaming (
201
+ self , history : list [dict [str , str ]], overrides : dict [str , Any ]
202
+ ) -> AsyncGenerator [dict , None ]:
168
203
extra_info , chat_coroutine = await self .run_until_final_call (history , overrides , should_stream = True )
169
204
yield extra_info
170
205
async for event in await chat_coroutine :
171
206
yield event
172
207
173
-
174
- def get_messages_from_history (self , system_prompt : str , model_id : str , history : list [dict [str , str ]], user_conv : str , few_shots = [], max_tokens : int = 4096 ) -> list :
208
+ def get_messages_from_history (
209
+ self ,
210
+ system_prompt : str ,
211
+ model_id : str ,
212
+ history : list [dict [str , str ]],
213
+ user_conv : str ,
214
+ few_shots = [],
215
+ max_tokens : int = 4096 ,
216
+ ) -> list :
175
217
message_builder = MessageBuilder (system_prompt , model_id )
176
218
177
- # Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
219
+ # Add examples to show the chat what responses we want.
220
+ # It will try to mimic any responses and make sure they match the rules laid out in the system message.
178
221
for shot in few_shots :
179
- message_builder .append_message (shot .get (' role' ), shot .get (' content' ))
222
+ message_builder .append_message (shot .get (" role" ), shot .get (" content" ))
180
223
181
224
user_content = user_conv
182
225
append_index = len (few_shots ) + 1
0 commit comments