Azure-Samples
diff --git a/‎.github/workflows/python-test.yaml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/python-test.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 1 deletion b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 1 deletion
diff --git a/‎app/backend/app.py
Lines changed: 25 additions & 16 deletions b/‎app/backend/app.py
Lines changed: 25 additions & 16 deletions
diff --git a/‎app/backend/approaches/chatreadretrieveread.py
Lines changed: 94 additions & 51 deletions b/‎app/backend/approaches/chatreadretrieveread.py
Lines changed: 94 additions & 51 deletions
@@ -37,5 +37,7 @@ jobs:
             pip install -r requirements-dev.txt
         - name: Lint with ruff
           run: ruff .
+        - name: Check formatting with black
+          run: black . --check --verbose
         - name: Run Python tests
           run: python3 -m pytest
@@ -7,6 +7,10 @@ repos:
     -   id: end-of-file-fixer
     -   id: trailing-whitespace
 -   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.282
+    rev: v0.0.289
     hooks:
     -   id: ruff
+-   repo: https://github.com/psf/black
+    rev: 23.9.1
+    hooks:
+    -   id: black
@@ -37,20 +37,24 @@
 CONFIG_CHAT_APPROACHES = "chat_approaches"
 CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
 
-bp = Blueprint("routes", __name__, static_folder='static')
+bp = Blueprint("routes", __name__, static_folder="static")
+
 
 @bp.route("/")
 async def index():
     return await bp.send_static_file("index.html")
 
+
 @bp.route("/favicon.ico")
 async def favicon():
     return await bp.send_static_file("favicon.ico")
 
+
 @bp.route("/assets/<path:path>")
 async def assets(path):
     return await send_from_directory("static/assets", path)
 
+
 # Serve content files from blob storage from within the app to keep the example self-contained.
 # *** NOTE *** this assumes that the content files are public, or at least that all users of the app
 # can access all the files. This is also slow and memory hungry.
@@ -68,6 +72,7 @@ async def content_file(path):
     blob_file.seek(0)
     return await send_file(blob_file, mimetype=mime_type, as_attachment=False, attachment_filename=path)
 
+
 @bp.route("/ask", methods=["POST"])
 async def ask():
     if not request.is_json:
@@ -87,6 +92,7 @@ async def ask():
         logging.exception("Exception in /ask")
         return jsonify({"error": str(e)}), 500
 
+
 @bp.route("/chat", methods=["POST"])
 async def chat():
     if not request.is_json:
@@ -111,6 +117,7 @@ async def format_as_ndjson(r: AsyncGenerator[dict, None]) -> AsyncGenerator[str,
     async for event in r:
         yield json.dumps(event, ensure_ascii=False) + "\n"
 
+
 @bp.route("/chat_stream", methods=["POST"])
 async def chat_stream():
     if not request.is_json:
@@ -123,7 +130,7 @@ async def chat_stream():
             return jsonify({"error": "unknown approach"}), 400
         response_generator = impl.run_with_streaming(request_json["history"], request_json.get("overrides", {}))
         response = await make_response(format_as_ndjson(response_generator))
-        response.timeout = None # type: ignore
+        response.timeout = None  # type: ignore
         return response
     except Exception as e:
         logging.exception("Exception in /chat")
@@ -134,13 +141,15 @@ async def chat_stream():
 async def ensure_openai_token():
     openai_token = current_app.config[CONFIG_OPENAI_TOKEN]
     if openai_token.expires_on < time.time() + 60:
-        openai_token = await current_app.config[CONFIG_CREDENTIAL].get_token("https://cognitiveservices.azure.com/.default")
+        openai_token = await current_app.config[CONFIG_CREDENTIAL].get_token(
+            "https://cognitiveservices.azure.com/.default"
+        )
         current_app.config[CONFIG_OPENAI_TOKEN] = openai_token
         openai.api_key = openai_token.token
 
+
 @bp.before_app_serving
 async def setup_clients():
-
     # Replace these with your own values, either in environment variables or directly here
     AZURE_STORAGE_ACCOUNT = os.environ["AZURE_STORAGE_ACCOUNT"]
     AZURE_STORAGE_CONTAINER = os.environ["AZURE_STORAGE_CONTAINER"]
@@ -158,25 +167,24 @@ async def setup_clients():
     # just use 'az login' locally, and managed identity when deployed on Azure). If you need to use keys, use separate AzureKeyCredential instances with the
     # keys for each service
     # If you encounter a blocking error during a DefaultAzureCredential resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
-    azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential = True)
+    azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
 
     # Set up clients for Cognitive Search and Storage
     search_client = SearchClient(
         endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
         index_name=AZURE_SEARCH_INDEX,
-        credential=azure_credential)
+        credential=azure_credential,
+    )
     blob_client = BlobServiceClient(
-        account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net",
-        credential=azure_credential)
+        account_url=f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", credential=azure_credential
+    )
     blob_container_client = blob_client.get_container_client(AZURE_STORAGE_CONTAINER)
 
     # Used by the OpenAI SDK
     openai.api_base = f"https://{AZURE_OPENAI_SERVICE}.openai.azure.com"
     openai.api_version = "2023-05-15"
     openai.api_type = "azure_ad"
-    openai_token = await azure_credential.get_token(
-        "https://cognitiveservices.azure.com/.default"
-    )
+    openai_token = await azure_credential.get_token("https://cognitiveservices.azure.com/.default")
     openai.api_key = openai_token.token
 
     # Store on app.config for later use inside requests
@@ -193,21 +201,22 @@ async def setup_clients():
             AZURE_OPENAI_CHATGPT_MODEL,
             AZURE_OPENAI_EMB_DEPLOYMENT,
             KB_FIELDS_SOURCEPAGE,
-            KB_FIELDS_CONTENT
+            KB_FIELDS_CONTENT,
         ),
         "rrr": ReadRetrieveReadApproach(
             search_client,
             AZURE_OPENAI_CHATGPT_DEPLOYMENT,
             AZURE_OPENAI_EMB_DEPLOYMENT,
             KB_FIELDS_SOURCEPAGE,
-            KB_FIELDS_CONTENT
+            KB_FIELDS_CONTENT,
         ),
-        "rda": ReadDecomposeAsk(search_client,
+        "rda": ReadDecomposeAsk(
+            search_client,
             AZURE_OPENAI_CHATGPT_DEPLOYMENT,
             AZURE_OPENAI_EMB_DEPLOYMENT,
             KB_FIELDS_SOURCEPAGE,
-            KB_FIELDS_CONTENT
-        )
+            KB_FIELDS_CONTENT,
+        ),
     }
     current_app.config[CONFIG_CHAT_APPROACHES] = {
         "rrr": ChatReadRetrieveReadApproach(
 
@@ -41,13 +41,21 @@ class ChatReadRetrieveReadApproach:
 If you cannot generate a search query, return just the number 0.
 """
     query_prompt_few_shots = [
-        {'role' : USER, 'content' : 'What are my health plans?' },
-        {'role' : ASSISTANT, 'content' : 'Show available health plans' },
-        {'role' : USER, 'content' : 'does my plan cover cardio?' },
-        {'role' : ASSISTANT, 'content' : 'Health plan cardio coverage' }
+        {"role": USER, "content": "What are my health plans?"},
+        {"role": ASSISTANT, "content": "Show available health plans"},
+        {"role": USER, "content": "does my plan cover cardio?"},
+        {"role": ASSISTANT, "content": "Health plan cardio coverage"},
     ]
 
-    def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt_model: str, embedding_deployment: str, sourcepage_field: str, content_field: str):
+    def __init__(
+        self,
+        search_client: SearchClient,
+        chatgpt_deployment: str,
+        chatgpt_model: str,
+        embedding_deployment: str,
+        sourcepage_field: str,
+        content_field: str,
+    ):
         self.search_client = search_client
         self.chatgpt_deployment = chatgpt_deployment
         self.chatgpt_model = chatgpt_model
@@ -56,15 +64,17 @@ def __init__(self, search_client: SearchClient, chatgpt_deployment: str, chatgpt
         self.content_field = content_field
         self.chatgpt_token_limit = get_token_limit(chatgpt_model)
 
-    async def run_until_final_call(self, history: list[dict[str, str]], overrides: dict[str, Any], should_stream: bool = False) -> tuple:
+    async def run_until_final_call(
+        self, history: list[dict[str, str]], overrides: dict[str, Any], should_stream: bool = False
+    ) -> tuple:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
         top = overrides.get("top") or 3
         exclude_category = overrides.get("exclude_category") or None
         filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
 
-        user_q = 'Generate search query for: ' + history[-1]["user"]
+        user_q = "Generate search query for: " + history[-1]["user"]
 
         # STEP 1: Generate an optimized keyword search query based on the chat history and the last question
         messages = self.get_messages_from_history(
@@ -73,89 +83,112 @@ async def run_until_final_call(self, history: list[dict[str, str]], overrides: d
             history,
             user_q,
             self.query_prompt_few_shots,
-            self.chatgpt_token_limit - len(user_q)
-            )
+            self.chatgpt_token_limit - len(user_q),
+        )
 
         chat_completion = await openai.ChatCompletion.acreate(
             deployment_id=self.chatgpt_deployment,
             model=self.chatgpt_model,
             messages=messages,
             temperature=0.0,
             max_tokens=32,
-            n=1)
+            n=1,
+        )
 
         query_text = chat_completion.choices[0].message.content
         if query_text.strip() == "0":
-            query_text = history[-1]["user"] # Use the last user input if we failed to generate a better query
+            query_text = history[-1]["user"]  # Use the last user input if we failed to generate a better query
 
         # STEP 2: Retrieve relevant documents from the search index with the GPT optimized query
 
         # If retrieval mode includes vectors, compute an embedding for the query
         if has_vector:
-            query_vector = (await openai.Embedding.acreate(engine=self.embedding_deployment, input=query_text))["data"][0]["embedding"]
+            embedding = await openai.Embedding.acreate(engine=self.embedding_deployment, input=query_text)
+            query_vector = embedding["data"][0]["embedding"]
         else:
             query_vector = None
 
-         # Only keep the text query if the retrieval mode uses text, otherwise drop it
+        # Only keep the text query if the retrieval mode uses text, otherwise drop it
         if not has_text:
             query_text = None
 
         # Use semantic L2 reranker if requested and if retrieval mode is text or hybrid (vectors + text)
         if overrides.get("semantic_ranker") and has_text:
-            r = await self.search_client.search(query_text,
-                                          filter=filter,
-                                          query_type=QueryType.SEMANTIC,
-                                          query_language="en-us",
-                                          query_speller="lexicon",
-                                          semantic_configuration_name="default",
-                                          top=top,
-                                          query_caption="extractive|highlight-false" if use_semantic_captions else None,
-                                          vector=query_vector,
-                                          top_k=50 if query_vector else None,
-                                          vector_fields="embedding" if query_vector else None)
+            r = await self.search_client.search(
+                query_text,
+                filter=filter,
+                query_type=QueryType.SEMANTIC,
+                query_language="en-us",
+                query_speller="lexicon",
+                semantic_configuration_name="default",
+                top=top,
+                query_caption="extractive|highlight-false" if use_semantic_captions else None,
+                vector=query_vector,
+                top_k=50 if query_vector else None,
+                vector_fields="embedding" if query_vector else None,
+            )
         else:
-            r = await self.search_client.search(query_text,
-                                          filter=filter,
-                                          top=top,
-                                          vector=query_vector,
-                                          top_k=50 if query_vector else None,
-                                          vector_fields="embedding" if query_vector else None)
+            r = await self.search_client.search(
+                query_text,
+                filter=filter,
+                top=top,
+                vector=query_vector,
+                top_k=50 if query_vector else None,
+                vector_fields="embedding" if query_vector else None,
+            )
         if use_semantic_captions:
-            results = [doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc['@search.captions']])) async for doc in r]
+            results = [
+                doc[self.sourcepage_field] + ": " + nonewlines(" . ".join([c.text for c in doc["@search.captions"]]))
+                async for doc in r
+            ]
         else:
             results = [doc[self.sourcepage_field] + ": " + nonewlines(doc[self.content_field]) async for doc in r]
         content = "\n".join(results)
 
-        follow_up_questions_prompt = self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
+        follow_up_questions_prompt = (
+            self.follow_up_questions_prompt_content if overrides.get("suggest_followup_questions") else ""
+        )
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
 
         # Allow client to replace the entire prompt, or to inject into the exiting prompt using >>>
         prompt_override = overrides.get("prompt_template")
         if prompt_override is None:
-            system_message = self.system_message_chat_conversation.format(injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt)
+            system_message = self.system_message_chat_conversation.format(
+                injected_prompt="", follow_up_questions_prompt=follow_up_questions_prompt
+            )
         elif prompt_override.startswith(">>>"):
-            system_message = self.system_message_chat_conversation.format(injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt)
+            system_message = self.system_message_chat_conversation.format(
+                injected_prompt=prompt_override[3:] + "\n", follow_up_questions_prompt=follow_up_questions_prompt
+            )
         else:
             system_message = prompt_override.format(follow_up_questions_prompt=follow_up_questions_prompt)
 
         messages = self.get_messages_from_history(
             system_message,
             self.chatgpt_model,
             history,
-            history[-1]["user"]+ "\n\nSources:\n" + content, # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
-            max_tokens=self.chatgpt_token_limit)
-        msg_to_display = '\n\n'.join([str(message) for message in messages])
-
-        extra_info = {"data_points": results, "thoughts": f"Searched for:<br>{query_text}<br><br>Conversations:<br>" + msg_to_display.replace('\n', '<br>')}
+            # Model does not handle lengthy system messages well.
+            # Moved sources to latest user conversation to solve follow up questions prompt.
+            history[-1]["user"] + "\n\nSources:\n" + content,
+            max_tokens=self.chatgpt_token_limit,
+        )
+        msg_to_display = "\n\n".join([str(message) for message in messages])
+
+        extra_info = {
+            "data_points": results,
+            "thoughts": f"Searched for:<br>{query_text}<br><br>Conversations:<br>"
+            + msg_to_display.replace("\n", "<br>"),
+        }
         chat_coroutine = openai.ChatCompletion.acreate(
-                deployment_id=self.chatgpt_deployment,
-                model=self.chatgpt_model,
-                messages=messages,
-                temperature=overrides.get("temperature") or 0.7,
-                max_tokens=1024,
-                n=1,
-                stream=should_stream)
+            deployment_id=self.chatgpt_deployment,
+            model=self.chatgpt_model,
+            messages=messages,
+            temperature=overrides.get("temperature") or 0.7,
+            max_tokens=1024,
+            n=1,
+            stream=should_stream,
+        )
         return (extra_info, chat_coroutine)
 
     async def run_without_streaming(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> dict[str, Any]:
@@ -164,19 +197,29 @@ async def run_without_streaming(self, history: list[dict[str, str]], overrides:
         extra_info["answer"] = chat_content
         return extra_info
 
-    async def run_with_streaming(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> AsyncGenerator[dict, None]:
+    async def run_with_streaming(
+        self, history: list[dict[str, str]], overrides: dict[str, Any]
+    ) -> AsyncGenerator[dict, None]:
         extra_info, chat_coroutine = await self.run_until_final_call(history, overrides, should_stream=True)
         yield extra_info
         async for event in await chat_coroutine:
             yield event
 
-
-    def get_messages_from_history(self, system_prompt: str, model_id: str, history: list[dict[str, str]], user_conv: str, few_shots = [], max_tokens: int = 4096) -> list:
+    def get_messages_from_history(
+        self,
+        system_prompt: str,
+        model_id: str,
+        history: list[dict[str, str]],
+        user_conv: str,
+        few_shots=[],
+        max_tokens: int = 4096,
+    ) -> list:
         message_builder = MessageBuilder(system_prompt, model_id)
 
-        # Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
+        # Add examples to show the chat what responses we want.
+        # It will try to mimic any responses and make sure they match the rules laid out in the system message.
         for shot in few_shots:
-            message_builder.append_message(shot.get('role'), shot.get('content'))
+            message_builder.append_message(shot.get("role"), shot.get("content"))
 
         user_content = user_conv
         append_index = len(few_shots) + 1