Skip to content

Commit 6bfb2cc

Browse files
authored
Migration Completion api to chat completion api (#419)
Port completion api to chatCompletion api
1 parent 1a8a1ce commit 6bfb2cc

File tree

5 files changed

+172
-115
lines changed

5 files changed

+172
-115
lines changed

app/backend/app.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,13 @@
5656
# Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
5757
# or some derivative, here we include several for exploration purposes
5858
ask_approaches = {
59-
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
59+
"rtr": RetrieveThenReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
6060
"rrr": ReadRetrieveReadApproach(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT),
6161
"rda": ReadDecomposeAsk(search_client, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
6262
}
6363

6464
chat_approaches = {
65-
"rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
65+
"rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
6666
}
6767

6868
app = Flask(__name__)

app/backend/approaches/chatreadretrieveread.py

Lines changed: 68 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
from approaches.approach import Approach
88
from text import nonewlines
99

10+
from core.messagebuilder import MessageBuilder
11+
from core.modelhelper import get_token_limit
12+
1013
class ChatReadRetrieveReadApproach(Approach):
1114
# Chat roles
1215
SYSTEM = "system"
@@ -34,41 +37,53 @@ class ChatReadRetrieveReadApproach(Approach):
3437
Generate a search query based on the conversation and the new question.
3538
Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
3639
Do not include any text inside [] or <<>> in the search query terms.
40+
Do not include any special characters like '+'.
3741
If the question is not in English, translate the question to English before generating the search query.
3842
39-
Chat History:
40-
{chat_history}
41-
42-
Question:
43-
{question}
44-
45-
Search query:
43+
Search Query:
4644
"""
47-
48-
def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, gpt_deployment: str, sourcepage_field: str, content_field: str):
45+
query_prompt_few_shots = [
46+
{'role' : USER, 'content' : 'What are my health plans?' },
47+
{'role' : ASSISTANT, 'content' : 'Show available health plans' },
48+
{'role' : USER, 'content' : 'does my plan cover cardio?' },
49+
{'role' : ASSISTANT, 'content' : 'Health plan cardio coverage' }
50+
]
51+
52+
def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, sourcepage_field: str, content_field: str):
4953
self.search_client = search_client
5054
self.chatgpt_deployment = chatgpt_deployment
5155
self.chatgpt_model = chatgpt_model
52-
self.gpt_deployment = gpt_deployment
5356
self.sourcepage_field = sourcepage_field
5457
self.content_field = content_field
58+
self.chatgpt_token_limit = get_token_limit(chatgpt_model)
5559

5660
def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> Any:
5761
use_semantic_captions = True if overrides.get("semantic_captions") else False
5862
top = overrides.get("top") or 3
5963
exclude_category = overrides.get("exclude_category") or None
6064
filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
6165

66+
user_q = 'Generate search query for: ' + history[-1]["user"]
67+
6268
# STEP 1: Generate an optimized keyword search query based on the chat history and the last question
63-
prompt = self.query_prompt_template.format(chat_history=self.get_chat_history_as_text(history, include_last_turn=False), question=history[-1]["user"])
64-
completion = openai.Completion.create(
65-
engine=self.gpt_deployment,
66-
prompt=prompt,
69+
messages = self.get_messages_from_history(
70+
self.query_prompt_template,
71+
self.chatgpt_model,
72+
history,
73+
user_q,
74+
self.query_prompt_few_shots,
75+
self.chatgpt_token_limit - len(user_q)
76+
)
77+
78+
chat_completion = openai.ChatCompletion.create(
79+
deployment_id=self.chatgpt_deployment,
80+
model=self.chatgpt_model,
81+
messages=messages,
6782
temperature=0.0,
6883
max_tokens=32,
69-
n=1,
70-
stop=["\n"])
71-
q = completion.choices[0].text
84+
n=1)
85+
86+
q = chat_completion.choices[0].message.content
7287

7388
# STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
7489
if overrides.get("semantic_ranker"):
@@ -90,94 +105,59 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
90105

91106
follow_up_questions_prompt = self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
92107

108+
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
109+
93110
# Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
94-
prompt_override = overrides.get("prompt_template")
95-
messages = self.get_messages_from_history(prompt_override=prompt_override, follow_up_questions_prompt=follow_up_questions_prompt,history=history, sources=content)
111+
prompt_override = overrides.get("prompt_override")
112+
if prompt_override is None:
113+
system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
114+
elif prompt_override.startswith(">>>"):
115+
system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
116+
else:
117+
system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)
118+
119+
# latest conversation
120+
user_content = history[-1]["user"] + " \nSources:" + content
121+
122+
messages = self.get_messages_from_history(
123+
system_message,
124+
self.chatgpt_model,
125+
history,
126+
user_content,
127+
max_tokens=self.chatgpt_token_limit)
96128

97-
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
98129
chat_completion = openai.ChatCompletion.create(
99130
deployment_id=self.chatgpt_deployment,
100131
model=self.chatgpt_model,
101132
messages=messages,
102133
temperature=overrides.get("temperature") or 0.7,
103134
max_tokens=1024,
104135
n=1)
105-
136+
106137
chat_content = chat_completion.choices[0].message.content
107138

108139
msg_to_display = '\n\n'.join([str(message) for message in messages])
109140

110141
return {"data_points": results, "answer": chat_content, "thoughts": f"Searched for:<br>{q}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>')}
111142

112-
def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_last_turn: bool=True, approx_max_tokens: int=1000) -> str:
113-
history_text = ""
114-
for h in reversed(history if include_last_turn else history[:-1]):
115-
history_text = """<|im_start|>user""" + "\n" + h["user"] + "\n" + """<|im_end|>""" + "\n" + """<|im_start|>assistant""" + "\n" + (h.get("bot", "") + """<|im_end|>""" if h.get("bot") else "") + "\n" + history_text
116-
if len(history_text) > approx_max_tokens*4:
117-
break
118-
return history_text
119-
120-
def get_messages_from_history(self, prompt_override, follow_up_questions_prompt, history: Sequence[dict[str, str]], sources: str, approx_max_tokens: int = 1000) -> []:
121-
'''
122-
Generate messages needed for chat Completion api
123-
'''
124-
messages = []
125-
token_count = 0
126-
if prompt_override is None:
127-
system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
128-
elif prompt_override.startswith(">>>"):
129-
system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
130-
else:
131-
system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)
143+
def get_messages_from_history(self, system_prompt: str, model_id: str, history: Sequence[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> []:
144+
message_builder = MessageBuilder(system_prompt, model_id)
145+
146+
# Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
147+
for shot in few_shots:
148+
message_builder.append_message(shot.get('role'), shot.get('content'))
149+
150+
user_content = user_conv
151+
append_index = len(few_shots) + 1
152+
153+
message_builder.append_message(self.USER, user_content, index=append_index)
132154

133-
messages.append({"role":self.SYSTEM, "content": system_message})
134-
token_count += self.num_tokens_from_messages(messages[-1], self.chatgpt_model)
135-
136-
# latest conversation
137-
user_content = history[-1]["user"] + " \nSources:" + sources
138-
messages.append({"role": self.USER, "content": user_content})
139-
token_count += token_count + self.num_tokens_from_messages(messages[-1], self.chatgpt_model)
140-
141-
'''
142-
Enqueue in reverse order
143-
if limit exceeds truncate old messages
144-
leaving system message behind
145-
Keep track of token count for each conversation
146-
If token count exceeds limit, break
147-
'''
148155
for h in reversed(history[:-1]):
149156
if h.get("bot"):
150-
messages.insert(1, {"role": self.ASSISTANT, "content" : h.get("bot")})
151-
token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
152-
messages.insert(1, {"role": self.USER, "content" : h.get("user")})
153-
token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
154-
if token_count > approx_max_tokens*4:
157+
message_builder.append_message(self.ASSISTANT, h.get('bot'), index=append_index)
158+
message_builder.append_message(self.USER, h.get('user'), index=append_index)
159+
if message_builder.token_length > max_tokens:
155160
break
156-
return messages
157-
158-
def num_tokens_from_messages(self, message: dict[str,str], model: str) -> int:
159-
"""
160-
Calculate the number of tokens required to encode a message.
161-
Args:
162-
message (dict): The message to encode, represented as a dictionary.
163-
model (str): The name of the model to use for encoding.
164-
Returns:
165-
int: The total number of tokens required to encode the message.
166-
Example:
167-
message = {'role': 'user', 'content': 'Hello, how are you?'}
168-
model = 'gpt-3.5-turbo'
169-
num_tokens_from_messages(message, model)
170-
output: 11
171-
"""
172-
encoding = tiktoken.encoding_for_model(self.get_oai_chatmodel_tiktok(model))
173-
num_tokens = 0
174-
num_tokens += 2 # For "role" and "content" keys
175-
for key, value in message.items():
176-
num_tokens += len(encoding.encode(value))
177-
return num_tokens
178-
179-
def get_oai_chatmodel_tiktok(self, aoaimodel: str):
180-
if aoaimodel == "" or aoaimodel is None:
181-
raise Exception("Expected AOAI chatGPT model name")
182161

183-
return "gpt-3.5-turbo" if aoaimodel == "gpt-35-turbo" else aoaimodel
162+
messages = message_builder.messages
163+
return messages
Lines changed: 28 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import openai
2+
23
from approaches.approach import Approach
34
from azure.search.documents import SearchClient
45
from azure.search.documents.models import QueryType
56
from text import nonewlines
67
from typing import Any
78

9+
from core.messagebuilder import MessageBuilder
810

911
class RetrieveThenReadApproach(Approach):
1012
"""
@@ -13,39 +15,30 @@ class RetrieveThenReadApproach(Approach):
1315
(answer) with that prompt.
1416
"""
1517

16-
template = \
18+
system_chat_template = \
1719
"You are an intelligent assistant helping Contoso Inc employees with their healthcare plan questions and employee handbook questions. " + \
1820
"Use 'you' to refer to the individual asking the questions even if they ask with 'I'. " + \
1921
"Answer the following question using only the data provided in the sources below. " + \
2022
"For tabular information return it as an html table. Do not return markdown format. " + \
2123
"Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. " + \
22-
"If you cannot answer using the sources below, say you don't know. " + \
23-
"""
24+
"If you cannot answer using the sources below, say you don't know. Use below example to answer"
2425

25-
###
26-
Question: 'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
26+
#shots/sample conversation
27+
question = """
28+
'What is the deductible for the employee plan for a visit to Overlake in Bellevue?'
2729
2830
Sources:
2931
info1.txt: deductibles depend on whether you are in-network or out-of-network. In-network deductibles are $500 for employee and $1000 for family. Out-of-network deductibles are $1000 for employee and $2000 for family.
3032
info2.pdf: Overlake is in-network for the employee plan.
3133
info3.pdf: Overlake is the name of the area that includes a park and ride near Bellevue.
3234
info4.pdf: In-network institutions include Overlake, Swedish and others in the region
33-
34-
Answer:
35-
In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf].
36-
37-
###
38-
Question: '{q}'?
39-
40-
Sources:
41-
{retrieved}
42-
43-
Answer:
4435
"""
36+
answer = "In-network deductibles are $500 for employee and $1000 for family [info1.txt] and Overlake is in-network for the employee plan [info2.pdf][info4.pdf]."
4537

46-
def __init__(self, search_client: SearchClient, openai_deployment: str, sourcepage_field: str, content_field: str):
38+
def __init__(self, search_client: SearchClient, openai_deployment: str, chatgpt_model: str, sourcepage_field: str, content_field: str):
4739
self.search_client = search_client
4840
self.openai_deployment = openai_deployment
41+
self.chatgpt_model = chatgpt_model
4942
self.sourcepage_field = sourcepage_field
5043
self.content_field = content_field
5144

@@ -72,13 +65,23 @@ def run(self, q: str, overrides: dict[str, Any]) -> Any:
7265
results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) for doc in r]
7366
content = "\n".join(results)
7467

75-
prompt = (overrides.get("prompt_template") or self.template).format(q=q, retrieved=content)
76-
completion = openai.Completion.create(
77-
engine=self.openai_deployment,
78-
prompt=prompt,
68+
message_builder = MessageBuilder(overrides.get("prompt_template") or self.system_chat_template, self.chatgpt_model);
69+
70+
# add user question
71+
user_content = q + "\n" + "Sources:\n {content}".format(content=content)
72+
message_builder.append_message('user', user_content)
73+
74+
# Add shots/samples. This helps model to mimic response and make sure they match rules laid out in system message.
75+
message_builder.append_message('assistant', self.answer)
76+
message_builder.append_message('user', self.question)
77+
78+
messages = message_builder.messages
79+
chat_completion = openai.ChatCompletion.create(
80+
deployment_id=self.openai_deployment,
81+
model=self.chatgpt_model,
82+
messages=messages,
7983
temperature=overrides.get("temperature") or 0.3,
8084
max_tokens=1024,
81-
n=1,
82-
stop=["\n"])
83-
84-
return {"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Question:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
85+
n=1)
86+
87+
return {"data_points": results, "answer": chat_completion.choices[0].message.content, "thoughts": f"Question:<br>{q}<br><br>Prompt:<br>" + '\n\n'.join([str(message) for message in messages])}

app/backend/core/messagebuilder.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from core.modelhelper import num_tokens_from_messages
2+
3+
4+
class MessageBuilder:
5+
"""
6+
A class for building and managing messages in a chat conversation.
7+
Attributes:
8+
message (list): A list of dictionaries representing chat messages.
9+
model (str): The name of the ChatGPT model.
10+
token_count (int): The total number of tokens in the conversation.
11+
Methods:
12+
__init__(self, system_content: str, chatgpt_model: str): Initializes the MessageBuilder instance.
13+
append_message(self, role: str, content: str, index: int = 1): Appends a new message to the conversation.
14+
"""
15+
16+
def __init__(self, system_content: str, chatgpt_model: str):
17+
self.messages = [{'role': 'system', 'content': system_content}]
18+
self.model = chatgpt_model
19+
self.token_length = num_tokens_from_messages(
20+
self.messages[-1], self.model)
21+
22+
def append_message(self, role: str, content: str, index: int = 1):
23+
self.messages.insert(index, {'role': role, 'content': content})
24+
self.token_length += num_tokens_from_messages(
25+
self.messages[index], self.model)

app/backend/core/modelhelper.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import tiktoken
2+
3+
MODELS_2_TOKEN_LIMITS = {
4+
"gpt-35-turbo": 4000,
5+
"gpt-3.5-turbo": 4000,
6+
"gpt-35-turbo-16k": 16000,
7+
"gpt-3.5-turbo-16k": 16000,
8+
"gpt-4": 8100,
9+
"gpt-4-32k": 32000
10+
}
11+
12+
AOAI_2_OAI = {
13+
"gpt-35-turbo": "gpt-3.5-turbo",
14+
"gpt-35-turbo-16k": "gpt-3.5-turbo-16k"
15+
}
16+
17+
18+
def get_token_limit(model_id: str) -> int:
19+
if model_id not in MODELS_2_TOKEN_LIMITS:
20+
raise ValueError("Expected Model Gpt-35-turbo and above")
21+
return MODELS_2_TOKEN_LIMITS.get(model_id)
22+
23+
24+
def num_tokens_from_messages(message: dict[str, str], model: str) -> int:
25+
"""
26+
Calculate the number of tokens required to encode a message.
27+
Args:
28+
message (dict): The message to encode, represented as a dictionary.
29+
model (str): The name of the model to use for encoding.
30+
Returns:
31+
int: The total number of tokens required to encode the message.
32+
Example:
33+
message = {'role': 'user', 'content': 'Hello, how are you?'}
34+
model = 'gpt-3.5-turbo'
35+
num_tokens_from_messages(message, model)
36+
output: 11
37+
"""
38+
encoding = tiktoken.encoding_for_model(get_oai_chatmodel_tiktok(model))
39+
num_tokens = 2 # For "role" and "content" keys
40+
for key, value in message.items():
41+
num_tokens += len(encoding.encode(value))
42+
return num_tokens
43+
44+
45+
def get_oai_chatmodel_tiktok(aoaimodel: str) -> str:
46+
if aoaimodel == "" or aoaimodel is None:
47+
raise ValueError("Expected AOAI chatGPT model name")
48+
49+
return AOAI_2_OAI.get(aoaimodel)

0 commit comments

Comments
 (0)