Merge pull request #385 from Azure-Samples/srbalakr/chatCompletion5.7

srbalakr · web-flow · commit 4063ad6d2b1f · 2023-07-10T12:06:24.000-07:00
Upgrade sample to use ChatCompletion API
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -21,6 +21,7 @@
 AZURE_OPENAI_SERVICE = os.environ.get("AZURE_OPENAI_SERVICE") or "myopenai"
 AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_GPT_DEPLOYMENT") or "davinci"
 AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get("AZURE_OPENAI_CHATGPT_DEPLOYMENT") or "chat"
+AZURE_OPENAI_CHATGPT_MODEL = os.environ.get("AZURE_OPENAI_CHATGPT_MODEL") or "gpt-35-turbo"
 
 KB_FIELDS_CONTENT = os.environ.get("KB_FIELDS_CONTENT") or "content"
 KB_FIELDS_CATEGORY = os.environ.get("KB_FIELDS_CATEGORY") or "category"
@@ -35,7 +36,7 @@
 # Used by the OpenAI SDK
 openai.api_type = "azure"
 openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
-openai.api_version = "2022-12-01"
+openai.api_version = "2023-05-15"
 
 # Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead
 openai.api_type = "azure_ad"
@@ -61,7 +62,7 @@
 }
 
 chat_approaches = {
-    "rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
+    "rrr": ChatReadRetrieveReadApproach(search_client, AZURE_OPENAI_CHATGPT_DEPLOYMENT, AZURE_OPENAI_CHATGPT_MODEL, AZURE_OPENAI_GPT_DEPLOYMENT, KB_FIELDS_SOURCEPAGE, KB_FIELDS_CONTENT)
 }
 
 app = Flask(__name__)
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,31 +1,30 @@
 from typing import Any, Sequence
 
 import openai
+import tiktoken
 from azure.search.documents import SearchClient
 from azure.search.documents.models import QueryType
 from approaches.approach import Approach
 from text import nonewlines
 
 class ChatReadRetrieveReadApproach(Approach):
+    # Chat roles
+    SYSTEM = "system"
+    USER = "user"
+    ASSISTANT = "assistant"
+
     """
     Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
     top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
     (answer) with that prompt.
     """
-
-    prompt_prefix = """<|im_start|>system
-Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
+    system_message_chat_conversation = """Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.
 Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.
 For tabular information return it as an html table. Do not return markdown format.
 Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].
 {follow_up_questions_prompt}
 {injected_prompt}
-Sources:
-{sources}
-<|im_end|>
-{chat_history}
 """
-
     follow_up_questions_prompt_content = """Generate three very brief follow-up questions that the user would likely ask next about their healthcare plan and employee handbook. 
     Use double angle brackets to reference the questions, e.g. <<Are there exclusions for prescriptions?>>.
     Try not to repeat questions that have already been asked.
@@ -46,9 +45,10 @@ class ChatReadRetrieveReadApproach(Approach):
 Search query:
 """
 
-    def __init__(self, search_client: SearchClient, chatgpt_deployment: str, gpt_deployment: str, sourcepage_field: str, content_field: str):
+    def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, gpt_deployment: str, sourcepage_field: str, content_field: str):
         self.search_client = search_client
         self.chatgpt_deployment = chatgpt_deployment
+        self.chatgpt_model = chatgpt_model
         self.gpt_deployment = gpt_deployment
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
@@ -92,23 +92,20 @@ def run(self, history: Sequence[dict[str, str]], overrides: dict[str, Any]) -> A
         
         # Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
         prompt_override = overrides.get("prompt_template")
-        if prompt_override is None:
-            prompt = self.prompt_prefix.format(injected_prompt="", sources=content, chat_history=self.get_chat_history_as_text(history), follow_up_questions_prompt=follow_up_questions_prompt)
-        elif prompt_override.startswith(">>>"):
-            prompt = self.prompt_prefix.format(injected_prompt=prompt_override[3:] + "\n", sources=content, chat_history=self.get_chat_history_as_text(history), follow_up_questions_prompt=follow_up_questions_prompt)
-        else:
-            prompt = prompt_override.format(sources=content, chat_history=self.get_chat_history_as_text(history), follow_up_questions_prompt=follow_up_questions_prompt)
+        messages = self.get_messages_from_history(prompt_override=prompt_override, follow_up_questions_prompt=follow_up_questions_prompt,history=history, sources=content)
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
-        completion = openai.Completion.create(
-            engine=self.chatgpt_deployment, 
-            prompt=prompt, 
+        chat_completion = openai.ChatCompletion.create(
+            deployment_id=self.chatgpt_deployment,
+            model=self.chatgpt_model,
+            messages=messages, 
             temperature=overrides.get("temperature") or 0.7, 
             max_tokens=1024, 
-            n=1, 
-            stop=["<|im_end|>", "<|im_start|>"])
+            n=1)
+        
+        chat_content = chat_completion.choices[0].message.content
 
-        return {"data_points": results, "answer": completion.choices[0].text, "thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
+        return {"data_points": results, "answer": chat_content, "thoughts": f"Searched for:<br>{q}<br><br>Prompt:<br>" + prompt.replace('\n', '<br>')}
     
     def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_last_turn: bool=True, approx_max_tokens: int=1000) -> str:
         history_text = ""
@@ -117,3 +114,68 @@ def get_chat_history_as_text(self, history: Sequence[dict[str, str]], include_la
             if len(history_text) > approx_max_tokens*4:
                 break    
         return history_text
+    
+    def get_messages_from_history(self, prompt_override, follow_up_questions_prompt, history: Sequence[dict[str, str]], sources: str, approx_max_tokens: int = 1000) -> []:
+        '''
+        Generate messages needed for chat Completion api
+        '''
+        messages = []
+        token_count = 0
+        if prompt_override is None:
+            system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
+        elif prompt_override.startswith(">>>"):
+            system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
+        else:
+            system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)
+
+        messages.append({"role":self.SYSTEM, "content": system_message})
+        token_count += self.num_tokens_from_messages(messages[-1], self.chatgpt_model)
+        
+        # latest conversation
+        user_content = history[-1]["user"] + " \nSources:" + sources
+        messages.append({"role": self.USER, "content": user_content})
+        token_count += token_count + self.num_tokens_from_messages(messages[-1], self.chatgpt_model)
+
+        '''
+        Enqueue in reverse order
+        if limit exceeds truncate old messages 
+        leaving system message behind
+        Keep track of token count for each conversation
+        If token count exceeds limit, break
+        '''
+        for h in reversed(history[:-1]):
+            if h.get("bot"):
+                messages.insert(1, {"role": self.ASSISTANT, "content" : h.get("bot")})
+                token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
+            messages.insert(1, {"role": self.USER, "content" : h.get("user")})
+            token_count += self.num_tokens_from_messages(messages[1], self.chatgpt_model)
+            if token_count > approx_max_tokens*4:
+                break
+        return messages
+    
+    def num_tokens_from_messages(self, message: dict[str,str], model: str) -> int:
+        """
+        Calculate the number of tokens required to encode a message.
+        Args:
+            message (dict): The message to encode, represented as a dictionary.
+            model (str): The name of the model to use for encoding.
+        Returns:
+            int: The total number of tokens required to encode the message.
+        Example:
+            message = {'role': 'user', 'content': 'Hello, how are you?'}
+            model = 'gpt-3.5-turbo'
+            num_tokens_from_messages(message, model)
+            output: 11
+        """
+        encoding = tiktoken.encoding_for_model(self.get_oai_chatmodel_tiktok(model))
+        num_tokens = 0
+        num_tokens += 2  # For "role" and "content" keys
+        for key, value in message.items():
+            num_tokens += len(encoding.encode(value))
+        return num_tokens
+
+    def get_oai_chatmodel_tiktok(self, aoaimodel: str):
+        if aoaimodel == "" or aoaimodel is None:
+            raise Exception("Expected AOAI chatGPT model name")
+        
+        return "gpt-3.5-turbo" if aoaimodel == "gpt-35-turbo" else aoaimodel
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
@@ -1,6 +1,7 @@
 azure-identity==1.13.0
 Flask==2.2.5
 langchain==0.0.187
-openai==0.26.4
+openai==0.27.8
+tiktoken==0.3.0
 azure-search-documents==11.4.0b3
 azure-storage-blob==12.14.1