s

srbalakr · srbalakr · commit 8a34a97fa6c0 · 2023-07-10T18:39:50.000-07:00
diff --git a/notebooks/chat-read-retrieve-read.ipynb b/notebooks/chat-read-retrieve-read.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,10 +20,7 @@
     "AZURE_OPENAI_SERVICE = os.environ.get(\"AZURE_OPENAI_SERVICE\") or \"myopenai\"\n",
     "AZURE_OPENAI_GPT_DEPLOYMENT = os.environ.get(\"AZURE_OPENAI_GPT_DEPLOYMENT\") or \"davinci\"\n",
     "AZURE_OPENAI_CHATGPT_DEPLOYMENT = os.environ.get(\"AZURE_OPENAI_CHATGPT_DEPLOYMENT\") or \"chat\"\n",
-    "\n",
-    "KB_FIELDS_CONTENT = os.environ.get(\"KB_FIELDS_CONTENT\") or \"content\"\n",
-    "KB_FIELDS_CATEGORY = os.environ.get(\"KB_FIELDS_CATEGORY\") or \"category\"\n",
-    "KB_FIELDS_SOURCEPAGE = os.environ.get(\"KB_FIELDS_SOURCEPAGE\") or \"sourcepage\"\n",
+    "AZURE_OPENAI_CHATGPT_MODEL = os.environ.get(\"AZURE_OPENAI_CHATGPT_MODEL\") or \"gpt-35-turbo\"\n",
     "\n",
     "# Use the current user identity to authenticate with Azure OpenAI, Cognitive Search and Blob Storage (no secrets needed, \n",
     "# just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the \n",
@@ -33,7 +30,7 @@
     "# Used by the OpenAI SDK\n",
     "openai.api_type = \"azure\"\n",
     "openai.api_base = f\"https://{AZURE_OPENAI_SERVICE}.openai.azure.com\"\n",
-    "openai.api_version = \"2022-12-01\"\n",
+    "openai.api_version = \"2023-05-15\"\n",
     "\n",
     "# Comment these two lines out if using keys, set your API key in the OPENAI_API_KEY environment variable instead\n",
     "openai.api_type = \"azure_ad\"\n",
@@ -48,33 +45,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ChatGPT uses a particular set of tokens to indicate turns in conversations\n",
-    "prompt_prefix = \"\"\"<|im_start|>system\n",
-    "Assistant helps the company employees with their healthcare plan questions and employee handbook questions. \n",
-    "Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question. \n",
-    "Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brakets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n",
-    "\n",
-    "Sources:\n",
-    "{sources}\n",
-    "\n",
-    "<|im_end|>\"\"\"\n",
-    "\n",
-    "turn_prefix = \"\"\"\n",
-    "<|im_start|>user\n",
-    "\"\"\"\n",
-    "\n",
-    "turn_suffix = \"\"\"\n",
-    "<|im_end|>\n",
-    "<|im_start|>assistant\n",
+    "# Chat roles\n",
+    "SYSTEM = \"system\"\n",
+    "USER = \"user\"\n",
+    "ASSISTANT = \"assistant\"\n",
+    "\n",
+    "system_message_chat_conversation = \"\"\"Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\n",
+    "Answer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\n",
+    "Each source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n",
     "\"\"\"\n",
-    "\n",
-    "prompt_history = turn_prefix\n",
-    "\n",
-    "history = []\n",
+    "chat_conversations = [{\"role\" : SYSTEM, \"content\" : system_message_chat_conversation}]\n",
     "\n",
     "summary_prompt_template = \"\"\"Below is a summary of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base. Generate a search query based on the conversation and the new question. Source names are not good search terms to include in the search query.\n",
     "\n",
@@ -85,29 +69,64 @@
     "{question}\n",
     "\n",
     "Search query:\n",
-    "\"\"\""
+    "\"\"\"\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Searching: \"copay\" \"Northwind Health Plus\" \"Northwind Standard\" benefits details -source name\n",
+      "-------------------\n",
+      "\n",
+      "-------------------\n",
+      "\n",
+      "{'role': 'system', 'content': \"Assistant helps the company employees with their healthcare plan questions, and questions about the employee handbook. Be brief in your answers.\\nAnswer ONLY with the facts listed in the list of sources below. If there isn't enough information below, say you don't know. Do not generate answers that don't use the sources below. If asking a clarifying question to the user would help, ask the question.\\nEach source has a name followed by colon and the actual information, always include the source name for each fact you use in the response. Use square brackets to reference the source, e.g. [info1.txt]. Don't combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\\n\"}\n",
+      "{'role': 'user', 'content': 'does my plan cover eye exam'}\n",
+      "{'role': 'assistant', 'content': 'Yes, Northwind Health Plus plan covers vision exams, glasses, and contact lenses. However, Northwind Standard only covers vision and hearing screenings and certain preventive services such as routine physical exams, immunizations, and screenings for certain diseases such as cancer and diabetes. [Benefit_Options-2.pdf][Northwind_Standard_Benefits_Details-44.pdf]'}\n",
+      "{'role': 'user', 'content': 'does my plan cover eye exam'}\n",
+      "{'role': 'assistant', 'content': 'Yes, Northwind Health Plus plan covers vision exams, glasses, and contact lenses. [Benefit_Options-2.pdf][Northwind_Health_Plus_Benefits_Details-25.pdf]'}\n",
+      "{'role': 'user', 'content': 'does my plan cover vision'}\n",
+      "{'role': 'assistant', 'content': 'Yes, both Northwind Health Plus and Northwind Standard plans offer coverage for vision services. Northwind Health Plus plan offers coverage for vision exams, glasses, and contact lenses, whereas Northwind Standard plan only offers coverage for vision exams and glasses. [Benefit_Options-2.pdf][Northwind_Standard_Benefits_Details-97.pdf]'}\n",
+      "{'role': 'user', 'content': 'does my plan cover cardio'}\n",
+      "{'role': 'assistant', 'content': 'Unfortunately, I cannot find information about whether Northwind Health Plus or Northwind Standard plans cover cardio specifically. However, both plans offer coverage for medical services like hospital stays and doctor visits. [Benefit_Options-2.pdf]'}\n",
+      "{'role': 'user', 'content': 'does my plan cover dental'}\n",
+      "{'role': 'assistant', 'content': 'Yes, Northwind Standard plan offers coverage for dental services. This coverage includes medically necessary services for the relief of pain resulting from dental injury, as well as services for the administration of anesthesia in a facility. However, this plan does not offer coverage for emergency services, mental health and substance abuse coverage, or out-of-network services. [Benefit_Options-2.pdf][Northwind_Standard_Benefits_Details-23.pdf]'}\n",
+      "{'role': 'user', 'content': 'how much is copay'}\n",
+      "{'role': 'assistant', 'content': 'The amount of copay for Northwind Standard plan varies depending on the type of service you receive, and copays may also vary from provider to provider and from state to state. Northwind Health Plus plan also has copays for certain services, and the amount of copayment varies based on the type of service received, whether the service is rendered by an in-network or out-of-network provider, and other factors. For in-network services, copays for Northwind Health Plus plan are generally lower than for out-of-network services. The copayment for primary care visits is typically around $20, while specialist visits have a copayment of around $50. [Northwind_Standard_Benefits_Details-9.pdf][Northwind_Standard_Benefits_Details-10.pdf][Northwind_Health_Plus_Benefits_Details-9.pdf]'}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[None, None, None, None, None, None, None, None, None, None, None, None, None]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Execute this cell multiple times updating user_input to accumulate chat history\n",
-    "user_input = \"Does my plan cover annual eye exams?\"\n",
+    "user_input = \"how much is copay\"\n",
     "\n",
     "# Exclude category, to simulate scenarios where there's a set of docs you can't see\n",
     "exclude_category = None\n",
     "\n",
-    "if len(history) > 0:\n",
-    "    completion = openai.Completion.create(\n",
+    "if len(chat_conversations) > 1:\n",
+    "    query_completion = openai.Completion.create(\n",
     "        engine=AZURE_OPENAI_GPT_DEPLOYMENT,\n",
-    "        prompt=summary_prompt_template.format(summary=\"\\n\".join(history), question=user_input),\n",
+    "        prompt=summary_prompt_template.format(summary=str(chat_conversations), question=user_input),\n",
     "        temperature=0.7,\n",
     "        max_tokens=32,\n",
     "        stop=[\"\\n\"])\n",
-    "    search = completion.choices[0].text\n",
+    "    search = query_completion.choices[0].text\n",
     "else:\n",
     "    search = user_input\n",
     "\n",
@@ -125,21 +144,27 @@
     "results = [doc[KB_FIELDS_SOURCEPAGE] + \": \" + doc[KB_FIELDS_CONTENT].replace(\"\\n\", \"\").replace(\"\\r\", \"\") for doc in r]\n",
     "content = \"\\n\".join(results)\n",
     "\n",
-    "prompt = prompt_prefix.format(sources=content) + prompt_history + user_input + turn_suffix\n",
+    "user_content = user_input + \" \\nSOURCES:\\n\" + content\n",
     "\n",
-    "completion = openai.Completion.create(\n",
-    "    engine=AZURE_OPENAI_CHATGPT_DEPLOYMENT, \n",
-    "    prompt=prompt, \n",
-    "    temperature=0.7, \n",
-    "    max_tokens=1024,\n",
-    "    stop=[\"<|im_end|>\", \"<|im_start|>\"])\n",
-    "\n",
-    "prompt_history += user_input + turn_suffix + completion.choices[0].text + \"\\n<|im_end|>\" + turn_prefix\n",
-    "history.append(\"user: \" + user_input)\n",
-    "history.append(\"assistant: \" + completion.choices[0].text)\n",
+    "chat_conversations.append({\"role\": USER, \"content\": user_content })\n",
     "\n",
-    "print(\"\\n-------------------\\n\".join(history))\n",
-    "print(\"\\n-------------------\\nPrompt:\\n\" + prompt)"
+    "chat_completion = openai.ChatCompletion.create(\n",
+    "    deployment_id=AZURE_OPENAI_CHATGPT_DEPLOYMENT,\n",
+    "    model=AZURE_OPENAI_CHATGPT_MODEL,\n",
+    "    messages=chat_conversations, \n",
+    "    temperature=0.7, \n",
+    "    max_tokens=1024, \n",
+    "    n=1)\n",
+    "chat_content = chat_completion.choices[0].message.content\n",
+    "'''\n",
+    "reset user content to avoid sources in conversation history\n",
+    "add source as a single shot in query conversation\n",
+    "'''\n",
+    "chat_conversations[-1][\"content\"] = user_input\n",
+    "chat_conversations.append({\"role\":ASSISTANT, \"content\": chat_content})\n",
+    "\n",
+    "print(\"\\n-------------------\\n\")\n",
+    "[print(conversation) for conversation in chat_conversations]"
    ]
   }
  ],
@@ -159,7 +184,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.11.4"
   },
   "orig_nbformat": 4,
   "vscode": {