Merge pull request #407 from Azure-Samples/srbalakr/update-nb

srbalakr · web-flow · commit e3dc08714d97 · 2023-07-11T15:40:51.000-07:00
Update notebook with chatcompletion api
diff --git a/notebooks/chat-read-retrieve-read.ipynb b/notebooks/chat-read-retrieve-read.ipynb
@@ -20,6 +20,7 @@
     "AZURE_OPENAI_SERVICE = os.environ.get(\"AZURE_OPENAI_SERVICE\") or \"myopenai\"\n",
     "AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get(\"AZURE_OPENAI_GPT_DEPLOYMENT\") or \"davinci\"\n",
     "AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get(\"AZURE_OPENAI_CHATGPT_DEPLOYMENT\") or \"chat\"\n",
+    "AZURE_OPENAI_CHATGPT_MODEL = os.environ.get(\"AZURE_OPENAI_CHATGPT_MODEL\") or \"gpt-35-turbo\"\n",
     "\n",
     "KB_FIELDS_CONTENT = os.environ.get(\"KB_FIELDS_CONTENT\") or \"content\"\n",
     "KB_FIELDS_CATEGORY = os.environ.get(\"KB_FIELDS_CATEGORY\") or \"category\"\n",
@@ -33,7 +34,7 @@
     "# Used by the OpenAI SDK\n",
     "openai.api_type = \"azure\"\n",
     "openai.api_base = f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\"\n",
-    "openai.api_version = \"2022-12-01\"\n",
+    "openai.api_version = \"2023-05-15\"\n",
     "\n",
     "# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead\n",
     "openai.api_type = \"azure_ad\"\n",
@@ -52,29 +53,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ChatGPT uses a particular set of tokens to indicate turns in conversations\n",
-    "prompt_prefix = \"\"\"<|im_start|>system\n",
-    "Assistant helps the company employees with their healthcare plan questions and employee handbook questions. \n",
-    "Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question. \n",
-    "Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n",
-    "\n",
-    "Sources:\n",
-    "{sources}\n",
-    "\n",
-    "<|im_end|>\"\"\"\n",
-    "\n",
-    "turn_prefix = \"\"\"\n",
-    "<|im_start|>user\n",
-    "\"\"\"\n",
-    "\n",
-    "turn_suffix = \"\"\"\n",
-    "<|im_end|>\n",
-    "<|im_start|>assistant\n",
+    "# Chat roles\n",
+    "SYSTEM = \"system\"\n",
+    "USER = \"user\"\n",
+    "ASSISTANT = \"assistant\"\n",
+    "\n",
+    "system_message_chat_conversation = \"\"\"Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\n",
+    "Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\n",
+    "Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n",
     "\"\"\"\n",
-    "\n",
-    "prompt_history = turn_prefix\n",
-    "\n",
-    "history = []\n",
+    "chat_conversations = [{\"role\" : SYSTEM, \"content\" : system_message_chat_conversation}]\n",
     "\n",
     "summary_prompt_template = \"\"\"Below is a summary of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base. Generate a search query based on the conversation and the new question. Source names are not good search terms to include in the search query.\n",
     "\n",
@@ -85,7 +73,7 @@
     "{question}\n",
     "\n",
     "Search query:\n",
-    "\"\"\""
+    "\"\"\"\n"
    ]
   },
   {
@@ -100,14 +88,14 @@
     "# Exclude category, to simulate scenarios where there's a set of docs you can't see\n",
     "exclude_category = None\n",
     "\n",
-    "if len(history) > 0:\n",
-    "    completion = openai.Completion.create(\n",
+    "if len(chat_conversations) > 1:\n",
+    "    query_completion = openai.Completion.create(\n",
     "        engine=AZURE_OPENAI_GPT_DEPLOYMENT,\n",
-    "        prompt=summary_prompt_template.format(summary=\"\\n\".join(history), question=user_input),\n",
+    "        prompt=summary_prompt_template.format(summary=str(chat_conversations), question=user_input),\n",
     "        temperature=0.7,\n",
     "        max_tokens=32,\n",
     "        stop=[\"\\n\"])\n",
-    "    search = completion.choices[0].text\n",
+    "    search = query_completion.choices[0].text\n",
     "else:\n",
     "    search = user_input\n",
     "\n",
@@ -125,21 +113,27 @@
     "results = [doc[KB_FIELDS_SOURCEPAGE] + \": \" + doc[KB_FIELDS_CONTENT].replace(\"\\n\", \"\").replace(\"\\r\", \"\") for doc in r]\n",
     "content = \"\\n\".join(results)\n",
     "\n",
-    "prompt = prompt_prefix.format(sources=content) + prompt_history + user_input + turn_suffix\n",
+    "user_content = user_input + \" \\nSOURCES:\\n\" + content\n",
     "\n",
-    "completion = openai.Completion.create(\n",
-    "    engine=AZURE_OPENAI_CHATGPT_DEPLOYMENT, \n",
-    "    prompt=prompt, \n",
-    "    temperature=0.7, \n",
-    "    max_tokens=1024,\n",
-    "    stop=[\"<|im_end|>\", \"<|im_start|>\"])\n",
+    "chat_conversations.append({\"role\": USER, \"content\": user_content })\n",
     "\n",
-    "prompt_history += user_input + turn_suffix + completion.choices[0].text + \"\\n<|im_end|>\" + turn_prefix\n",
-    "history.append(\"user: \" + user_input)\n",
-    "history.append(\"assistant: \" + completion.choices[0].text)\n",
-    "\n",
-    "print(\"\\n-------------------\\n\".join(history))\n",
-    "print(\"\\n-------------------\\nPrompt:\\n\" + prompt)"
+    "chat_completion = openai.ChatCompletion.create(\n",
+    "    deployment_id=AZURE_OPENAI_CHATGPT_DEPLOYMENT,\n",
+    "    model=AZURE_OPENAI_CHATGPT_MODEL,\n",
+    "    messages=chat_conversations, \n",
+    "    temperature=0.7, \n",
+    "    max_tokens=1024, \n",
+    "    n=1)\n",
+    "chat_content = chat_completion.choices[0].message.content\n",
+    "'''\n",
+    "reset user content to avoid sources in conversation history\n",
+    "add source as a single shot in query conversation\n",
+    "'''\n",
+    "chat_conversations[-1][\"content\"] = user_input\n",
+    "chat_conversations.append({\"role\":ASSISTANT, \"content\": chat_content})\n",
+    "\n",
+    "print(\"\\n-------------------\\n\")\n",
+    "[print(conversation) for conversation in chat_conversations]"
    ]
   }
  ],
@@ -159,7 +153,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.11.4"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/notebooks/requirements.txt b/notebooks/requirements.txt
@@ -1,4 +1,4 @@
 azure-identity==1.13.0
 langchain==0.0.187
-openai==0.26.4
+openai==0.27.8
 azure-search-documents==11.4.0b3