Stabilize search query generation (#652)

srbalakr · pamelafox · web-flow · commit 7b7e6cedad84 · 2023-09-21T15:54:01.000-07:00
* Use function call to stabilize search query

* minor changes

* sort imports

* fix ruff

* s

* Update app/backend/approaches/chatreadretrieveread.py

Co-authored-by: Pamela Fox &lt;pamelafox@microsoft.com&gt;

* s

* blacj format

* add test

* save

---------

Co-authored-by: Pamela Fox &lt;pamelafox@microsoft.com&gt;
diff --git a/app/backend/app.py b/app/backend/app.py
@@ -193,7 +193,7 @@ async def setup_clients():
     if OPENAI_HOST == "azure":
         openai.api_type = "azure_ad"
         openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
-        openai.api_version = "2023-05-15"
+        openai.api_version = "2023-07-01-preview"
         openai_token = await azure_credential.get_token("https://cognitiveservices.azure.com/.default")
         openai.api_key = openai_token.token
         # Store on app.config for later use inside requests
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -1,3 +1,4 @@
+import json
 from typing import Any, AsyncGenerator
 
 import openai
@@ -15,6 +16,8 @@ class ChatReadRetrieveReadApproach:
     USER = "user"
     ASSISTANT = "assistant"
 
+    NO_RESPONSE = "0"
+
     """
     Simple retrieve-then-read implementation, using the Cognitive Search and OpenAI APIs directly. It first retrieves
     top documents from search, then constructs a prompt with them, and then uses OpenAI to generate an completion
@@ -33,6 +36,7 @@ class ChatReadRetrieveReadApproach:
 Only generate questions and do not generate any text before or after the questions, such as 'Next Questions'"""
 
     query_prompt_template = """Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching in a knowledge base about employee healthcare plans and the employee handbook.
+You have access to Azure Cognitive Search index with 100's of documents.
 Generate a search query based on the conversation and the new question.
 Do not include cited source filenames and document names e.g info.txt or doc.pdf in the search query terms.
 Do not include any text inside [] or <<>> in the search query terms.
@@ -78,16 +82,33 @@ async def run_until_final_call(
         exclude_category = overrides.get("exclude_category") or None
         filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
 
-        user_q = "Generate search query for: " + history[-1]["user"]
+        user_query_request = "Generate search query for: " + history[-1]["user"]
+
+        functions = [
+            {
+                "name": "search_sources",
+                "description": "Retrieve sources from the Azure Cognitive Search index",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "search_query": {
+                            "type": "string",
+                            "description": "Query string to retrieve documents from azure search eg: 'Health care plan'",
+                        }
+                    },
+                    "required": ["search_query"],
+                },
+            }
+        ]
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         messages = self.get_messages_from_history(
             self.query_prompt_template,
             self.chatgpt_model,
             history,
-            user_q,
+            user_query_request,
             self.query_prompt_few_shots,
-            self.chatgpt_token_limit - len(user_q),
+            self.chatgpt_token_limit - len(user_query_request),
         )
 
         chatgpt_args = {"deployment_id": self.chatgpt_deployment} if self.openai_host == "azure" else {}
@@ -98,11 +119,11 @@ async def run_until_final_call(
             temperature=0.0,
             max_tokens=32,
             n=1,
+            functions=functions,
+            function_call="auto",
         )
 
-        query_text = chat_completion.choices[0].message.content
-        if query_text.strip() == "0":
-            query_text = history[-1]["user"]  # Use the last user input if we failed to generate a better query
+        query_text = self.get_search_query(chat_completion, history[-1]["user"])
 
         # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
 
@@ -186,6 +207,7 @@ async def run_until_final_call(
             "thoughts": f"Searched for:<br>{query_text}<br><br>Conversations:<br>"
             + msg_to_display.replace("\n", "<br>"),
         }
+
         chat_coroutine = openai.ChatCompletion.acreate(
             **chatgpt_args,
             model=self.chatgpt_model,
@@ -199,7 +221,8 @@ async def run_until_final_call(
 
     async def run_without_streaming(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> dict[str, Any]:
         extra_info, chat_coroutine = await self.run_until_final_call(history, overrides, should_stream=False)
-        chat_content = (await chat_coroutine).choices[0].message.content
+        chat_resp = await chat_coroutine
+        chat_content = chat_resp.choices[0].message.content
         extra_info["answer"] = chat_content
         return extra_info
 
@@ -242,3 +265,16 @@ def get_messages_from_history(
 
         messages = message_builder.messages
         return messages
+
+    def get_search_query(self, chat_completion: dict[str, any], user_query: str):
+        response_message = chat_completion["choices"][0]["message"]
+        if function_call := response_message.get("function_call"):
+            if function_call["name"] == "search_sources":
+                arg = json.loads(function_call["arguments"])
+                search_query = arg.get("search_query", self.NO_RESPONSE)
+                if search_query != self.NO_RESPONSE:
+                    return search_query
+        elif query_text := response_message.get("content"):
+            if query_text.strip() != self.NO_RESPONSE:
+                return query_text
+        return user_query
diff --git a/tests/test_chatapproach.py b/tests/test_chatapproach.py
@@ -0,0 +1,23 @@
+import json
+
+from approaches.chatreadretrieveread import ChatReadRetrieveReadApproach
+
+
+def test_get_search_query():
+    chat_approach = ChatReadRetrieveReadApproach(None, "", "gpt-35-turbo", "gpt-35-turbo", "", "", "", "")
+
+    payload = '{"id":"chatcmpl-81JkxYqYppUkPtOAia40gki2vJ9QM","object":"chat.completion","created":1695324963,"model":"gpt-35-turbo","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"choices":[{"index":0,"finish_reason":"function_call","message":{"role":"assistant","function_call":{"name":"search_sources","arguments":"{\\n\\"search_query\\":\\"accesstelemedicineservices\\"\\n}"}},"content_filter_results":{}}],"usage":{"completion_tokens":19,"prompt_tokens":425,"total_tokens":444}}'
+    default_query = "hello"
+    query = chat_approach.get_search_query(json.loads(payload), default_query)
+
+    assert query == "accesstelemedicineservices"
+
+
+def test_get_search_query_returns_default():
+    chat_approach = ChatReadRetrieveReadApproach(None, "", "gpt-35-turbo", "gpt-35-turbo", "", "", "", "")
+
+    payload = '{"id":"chatcmpl-81JkxYqYppUkPtOAia40gki2vJ9QM","object":"chat.completion","created":1695324963,"model":"gpt-35-turbo","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}],"choices":[{"index":0,"finish_reason":"function_call","message":{"role":"assistant"},"content_filter_results":{}}],"usage":{"completion_tokens":19,"prompt_tokens":425,"total_tokens":444}}'
+    default_query = "hello"
+    query = chat_approach.get_search_query(json.loads(payload), default_query)
+
+    assert query == default_query