Azure-Samples
diff --git a/‎app/hrchatbot/backend/app.py‎
Lines changed: 245 additions & 94 deletions b/‎app/hrchatbot/backend/app.py‎
Lines changed: 245 additions & 94 deletions
diff --git a/‎app/hrchatbot/backend/approaches/approach.py‎
Lines changed: 76 additions & 24 deletions b/‎app/hrchatbot/backend/approaches/approach.py‎
Lines changed: 76 additions & 24 deletions
diff --git a/‎app/hrchatbot/backend/approaches/chatapproach.py‎
Lines changed: 40 additions & 11 deletions b/‎app/hrchatbot/backend/approaches/chatapproach.py‎
Lines changed: 40 additions & 11 deletions
@@ -116,7 +116,9 @@ def from_completion_usage(cls, usage: CompletionUsage) -> "TokenUsageProps":
             prompt_tokens=usage.prompt_tokens,
             completion_tokens=usage.completion_tokens,
             reasoning_tokens=(
-                usage.completion_tokens_details.reasoning_tokens if usage.completion_tokens_details else None
+                usage.completion_tokens_details.reasoning_tokens
+                if usage.completion_tokens_details
+                else None
             ),
             total_tokens=usage.total_tokens,
         )
@@ -148,7 +150,9 @@ def __init__(
         auth_helper: AuthenticationHelper,
         query_language: Optional[str],
         query_speller: Optional[str],
-        embedding_deployment: Optional[str],  # Not needed for non-Azure OpenAI or for retrieval_mode="text"
+        embedding_deployment: Optional[
+            str
+        ],  # Not needed for non-Azure OpenAI or for retrieval_mode="text"
         embedding_model: str,
         embedding_dimensions: int,
         embedding_field: str,
@@ -174,15 +178,23 @@ def __init__(
         self.reasoning_effort = reasoning_effort
         self.include_token_usage = True
 
-    def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]:
+    def build_filter(
+        self, overrides: dict[str, Any], auth_claims: dict[str, Any]
+    ) -> Optional[str]:
         include_category = overrides.get("include_category")
         exclude_category = overrides.get("exclude_category")
-        security_filter = self.auth_helper.build_security_filters(overrides, auth_claims)
+        security_filter = self.auth_helper.build_security_filters(
+            overrides, auth_claims
+        )
         filters = []
         if include_category:
-            filters.append("category eq '{}'".format(include_category.replace("'", "''")))
+            filters.append(
+                "category eq '{}'".format(include_category.replace("'", "''"))
+            )
         if exclude_category:
-            filters.append("category ne '{}'".format(exclude_category.replace("'", "''")))
+            filters.append(
+                "category ne '{}'".format(exclude_category.replace("'", "''"))
+            )
         if security_filter:
             filters.append(security_filter)
         return None if len(filters) == 0 else " and ".join(filters)
@@ -208,7 +220,9 @@ async def search(
                 search_text=search_text,
                 filter=filter,
                 top=top,
-                query_caption="extractive|highlight-false" if use_semantic_captions else None,
+                query_caption="extractive|highlight-false"
+                if use_semantic_captions
+                else None,
                 query_rewrites="generative" if use_query_rewriting else None,
                 vector_queries=search_vectors,
                 query_type=QueryType.SEMANTIC,
@@ -237,7 +251,9 @@ async def search(
                         sourcefile=document.get("sourcefile"),
                         oids=document.get("oids"),
                         groups=document.get("groups"),
-                        captions=cast(list[QueryCaptionResult], document.get("@search.captions")),
+                        captions=cast(
+                            list[QueryCaptionResult], document.get("@search.captions")
+                        ),
                         score=document.get("@search.score"),
                         reranker_score=document.get("@search.reranker_score"),
                     )
@@ -270,7 +286,10 @@ async def run_agentic_retrieval(
             retrieval_request=KnowledgeAgentRetrievalRequest(
                 messages=[
                     KnowledgeAgentMessage(
-                        role=str(msg["role"]), content=[KnowledgeAgentMessageTextContent(text=str(msg["content"]))]
+                        role=str(msg["role"]),
+                        content=[
+                            KnowledgeAgentMessageTextContent(text=str(msg["content"]))
+                        ],
                     )
                     for msg in messages
                     if msg["role"] != "system"
@@ -303,18 +322,25 @@ async def run_agentic_retrieval(
         if response and response.references:
             if results_merge_strategy == "interleaved":
                 # Use interleaved reference order
-                references = sorted(response.references, key=lambda reference: int(reference.id))
+                references = sorted(
+                    response.references, key=lambda reference: int(reference.id)
+                )
             else:
                 # Default to descending strategy
                 references = response.references
             for reference in references:
-                if isinstance(reference, KnowledgeAgentAzureSearchDocReference) and reference.source_data:
+                if (
+                    isinstance(reference, KnowledgeAgentAzureSearchDocReference)
+                    and reference.source_data
+                ):
                     results.append(
                         Document(
                             id=reference.doc_key,
                             content=reference.source_data["content"],
                             sourcepage=reference.source_data["sourcepage"],
-                            search_agent_query=activity_mapping[reference.activity_source],
+                            search_agent_query=activity_mapping[
+                                reference.activity_source
+                            ],
                         )
                     )
                 if top and len(results) == top:
@@ -323,22 +349,28 @@ async def run_agentic_retrieval(
         return response, results
 
     def get_sources_content(
-        self, results: list[Document], use_semantic_captions: bool, use_image_citation: bool
+        self,
+        results: list[Document],
+        use_semantic_captions: bool,
+        use_image_citation: bool,
     ) -> list[str]:
-
         def nonewlines(s: str) -> str:
             return s.replace("\n", " ").replace("\r", " ")
 
         if use_semantic_captions:
             return [
                 (self.get_citation((doc.sourcepage or ""), use_image_citation))
                 + ": "
-                + nonewlines(" . ".join([cast(str, c.text) for c in (doc.captions or [])]))
+                + nonewlines(
+                    " . ".join([cast(str, c.text) for c in (doc.captions or [])])
+                )
                 for doc in results
             ]
         else:
             return [
-                (self.get_citation((doc.sourcepage or ""), use_image_citation)) + ": " + nonewlines(doc.content or "")
+                (self.get_citation((doc.sourcepage or ""), use_image_citation))
+                + ": "
+                + nonewlines(doc.content or "")
                 for doc in results
             ]
 
@@ -365,21 +397,29 @@ class ExtraArgs(TypedDict, total=False):
             dimensions: int
 
         dimensions_args: ExtraArgs = (
-            {"dimensions": self.embedding_dimensions} if SUPPORTED_DIMENSIONS_MODEL[self.embedding_model] else {}
+            {"dimensions": self.embedding_dimensions}
+            if SUPPORTED_DIMENSIONS_MODEL[self.embedding_model]
+            else {}
         )
         embedding = await self.openai_client.embeddings.create(
             # Azure OpenAI takes the deployment name as the model name
-            model=self.embedding_deployment if self.embedding_deployment else self.embedding_model,
+            model=self.embedding_deployment
+            if self.embedding_deployment
+            else self.embedding_model,
             input=q,
             **dimensions_args,
         )
         query_vector = embedding.data[0].embedding
         # This performs an oversampling due to how the search index was setup,
         # so we do not need to explicitly pass in an oversampling parameter here
-        return VectorizedQuery(vector=query_vector, k_nearest_neighbors=50, fields=self.embedding_field)
+        return VectorizedQuery(
+            vector=query_vector, k_nearest_neighbors=50, fields=self.embedding_field
+        )
 
     async def compute_image_embedding(self, q: str):
-        endpoint = urljoin(self.vision_endpoint, "computervision/retrieval:vectorizeText")
+        endpoint = urljoin(
+            self.vision_endpoint, "computervision/retrieval:vectorizeText"
+        )
         headers = {"Content-Type": "application/json"}
         params = {"api-version": "2024-02-01", "model-version": "2023-04-15"}
         data = {"text": q}
@@ -388,13 +428,21 @@ async def compute_image_embedding(self, q: str):
 
         async with aiohttp.ClientSession() as session:
             async with session.post(
-                url=endpoint, params=params, headers=headers, json=data, raise_for_status=True
+                url=endpoint,
+                params=params,
+                headers=headers,
+                json=data,
+                raise_for_status=True,
             ) as response:
                 json = await response.json()
                 image_query_vector = json["vector"]
-        return VectorizedQuery(vector=image_query_vector, k_nearest_neighbors=50, fields="imageEmbedding")
+        return VectorizedQuery(
+            vector=image_query_vector, k_nearest_neighbors=50, fields="imageEmbedding"
+        )
 
-    def get_system_prompt_variables(self, override_prompt: Optional[str]) -> dict[str, str]:
+    def get_system_prompt_variables(
+        self, override_prompt: Optional[str]
+    ) -> dict[str, str]:
         # Allows client to replace the entire prompt, or to inject into the existing prompt using >>>
         if override_prompt is None:
             return {}
@@ -433,7 +481,11 @@ def create_chat_completion(
             if supported_features.streaming and should_stream:
                 params["stream"] = True
                 params["stream_options"] = {"include_usage": True}
-            params["reasoning_effort"] = reasoning_effort or overrides.get("reasoning_effort") or self.reasoning_effort
+            params["reasoning_effort"] = (
+                reasoning_effort
+                or overrides.get("reasoning_effort")
+                or self.reasoning_effort
+            )
 
         else:
             # Include parameters that may not be supported for reasoning models
 
@@ -18,13 +18,15 @@
 
 
 class ChatApproach(Approach, ABC):
-
     NO_RESPONSE = "0"
 
     @abstractmethod
     async def run_until_final_call(
         self, messages, overrides, auth_claims, should_stream
-    ) -> tuple[ExtraInfo, Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]]]:
+    ) -> tuple[
+        ExtraInfo,
+        Union[Awaitable[ChatCompletion], Awaitable[AsyncStream[ChatCompletionChunk]]],
+    ]:
         pass
 
     def get_search_query(self, chat_completion: ChatCompletion, user_query: str):
@@ -60,14 +62,20 @@ async def run_without_streaming(
         extra_info, chat_coroutine = await self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=False
         )
-        chat_completion_response: ChatCompletion = await cast(Awaitable[ChatCompletion], chat_coroutine)
+        chat_completion_response: ChatCompletion = await cast(
+            Awaitable[ChatCompletion], chat_coroutine
+        )
         content = chat_completion_response.choices[0].message.content
         role = chat_completion_response.choices[0].message.role
         if overrides.get("suggest_followup_questions"):
             content, followup_questions = self.extract_followup_questions(content)
             extra_info.followup_questions = followup_questions
         # Assume last thought is for generating answer
-        if self.include_token_usage and extra_info.thoughts and chat_completion_response.usage:
+        if (
+            self.include_token_usage
+            and extra_info.thoughts
+            and chat_completion_response.usage
+        ):
             extra_info.thoughts[-1].update_token_usage(chat_completion_response.usage)
         chat_app_response = {
             "message": {"content": content, "role": role},
@@ -86,8 +94,14 @@ async def run_with_streaming(
         extra_info, chat_coroutine = await self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=True
         )
-        chat_coroutine = cast(Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine)
-        yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
+        chat_coroutine = cast(
+            Awaitable[AsyncStream[ChatCompletionChunk]], chat_coroutine
+        )
+        yield {
+            "delta": {"role": "assistant"},
+            "context": extra_info,
+            "session_state": session_state,
+        }
 
         followup_questions_started = False
         followup_content = ""
@@ -104,7 +118,9 @@ async def run_with_streaming(
                 }
                 # if event contains << and not >>, it is start of follow-up question, truncate
                 content = completion["delta"].get("content")
-                content = content or ""  # content may either not exist in delta, or explicitly be None
+                content = (
+                    content or ""
+                )  # content may either not exist in delta, or explicitly be None
                 if overrides.get("suggest_followup_questions") and "<<" in content:
                     followup_questions_started = True
                     earlier_content = content[: content.index("<<")]
@@ -119,15 +135,26 @@ async def run_with_streaming(
             else:
                 # Final chunk at end of streaming should contain usage
                 # https://cookbook.openai.com/examples/how_to_stream_completions#4-how-to-get-token-usage-data-for-streamed-chat-completion-response
-                if event_chunk.usage and extra_info.thoughts and self.include_token_usage:
+                if (
+                    event_chunk.usage
+                    and extra_info.thoughts
+                    and self.include_token_usage
+                ):
                     extra_info.thoughts[-1].update_token_usage(event_chunk.usage)
-                    yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
+                    yield {
+                        "delta": {"role": "assistant"},
+                        "context": extra_info,
+                        "session_state": session_state,
+                    }
 
         if followup_content:
             _, followup_questions = self.extract_followup_questions(followup_content)
             yield {
                 "delta": {"role": "assistant"},
-                "context": {"context": extra_info, "followup_questions": followup_questions},
+                "context": {
+                    "context": extra_info,
+                    "followup_questions": followup_questions,
+                },
             }
 
     async def run(
@@ -138,7 +165,9 @@ async def run(
     ) -> dict[str, Any]:
         overrides = context.get("overrides", {})
         auth_claims = context.get("auth_claims", {})
-        return await self.run_without_streaming(messages, overrides, auth_claims, session_state)
+        return await self.run_without_streaming(
+            messages, overrides, auth_claims, session_state
+        )
 
     async def run_stream(
         self,