Azure-Samples
diff --git a/‎app/backend/app.py‎
Lines changed: 29 additions & 7 deletions b/‎app/backend/app.py‎
Lines changed: 29 additions & 7 deletions
diff --git a/‎app/backend/approaches/approach.py‎
Lines changed: 9 additions & 3 deletions b/‎app/backend/approaches/approach.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎app/backend/approaches/chatapproach.py‎
Lines changed: 25 additions & 39 deletions b/‎app/backend/approaches/chatapproach.py‎
Lines changed: 25 additions & 39 deletions
diff --git a/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 7 additions & 6 deletions b/‎app/backend/approaches/retrievethenread.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎app/backend/approaches/retrievethenreadvision.py‎
Lines changed: 8 additions & 6 deletions b/‎app/backend/approaches/retrievethenreadvision.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎app/frontend/src/api/api.ts‎
Lines changed: 6 additions & 2 deletions b/‎app/frontend/src/api/api.ts‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎app/frontend/src/api/models.ts‎
Lines changed: 6 additions & 8 deletions b/‎app/frontend/src/api/models.ts‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx‎
Lines changed: 4 additions & 4 deletions b/‎app/frontend/src/components/AnalysisPanel/AnalysisPanel.tsx‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎app/frontend/src/components/Answer/Answer.tsx‎
Lines changed: 4 additions & 4 deletions b/‎app/frontend/src/components/Answer/Answer.tsx‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎app/frontend/src/pages/ask/Ask.tsx‎
Lines changed: 2 additions & 2 deletions b/‎app/frontend/src/pages/ask/Ask.tsx‎
Lines changed: 2 additions & 2 deletions
@@ -215,17 +215,39 @@ async def chat(auth_claims: Dict[str, Any]):
 
         result = await approach.run(
             request_json["messages"],
-            stream=request_json.get("stream", False),
             context=context,
             session_state=request_json.get("session_state"),
         )
-        if isinstance(result, dict):
-            return jsonify(result)
+        return jsonify(result)
+    except Exception as error:
+        return error_response(error, "/chat")
+
+
+@bp.route("/chat/stream", methods=["POST"])
+@authenticated
+async def chat_stream(auth_claims: Dict[str, Any]):
+    if not request.is_json:
+        return jsonify({"error": "request must be json"}), 415
+    request_json = await request.get_json()
+    context = request_json.get("context", {})
+    context["auth_claims"] = auth_claims
+    try:
+        use_gpt4v = context.get("overrides", {}).get("use_gpt4v", False)
+        approach: Approach
+        if use_gpt4v and CONFIG_CHAT_VISION_APPROACH in current_app.config:
+            approach = cast(Approach, current_app.config[CONFIG_CHAT_VISION_APPROACH])
         else:
-            response = await make_response(format_as_ndjson(result))
-            response.timeout = None  # type: ignore
-            response.mimetype = "application/json-lines"
-            return response
+            approach = cast(Approach, current_app.config[CONFIG_CHAT_APPROACH])
+
+        result = await approach.run_stream(
+            request_json["messages"],
+            context=context,
+            session_state=request_json.get("session_state"),
+        )
+        response = await make_response(format_as_ndjson(result))
+        response.timeout = None  # type: ignore
+        response.mimetype = "application/json-lines"
+        return response
     except Exception as error:
         return error_response(error, "/chat")
 
 
@@ -9,7 +9,6 @@
     List,
     Optional,
     TypedDict,
-    Union,
     cast,
 )
 from urllib.parse import urljoin
@@ -257,8 +256,15 @@ async def compute_image_embedding(self, q: str):
     async def run(
         self,
         messages: list[ChatCompletionMessageParam],
-        stream: bool = False,
         session_state: Any = None,
         context: dict[str, Any] = {},
-    ) -> Union[dict[str, Any], AsyncGenerator[dict[str, Any], None]]:
+    ) -> dict[str, Any]:
+        raise NotImplementedError
+
+    async def run_stream(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        session_state: Any = None,
+        context: dict[str, Any] = {},
+    ) -> AsyncGenerator[dict[str, Any], None]:
         raise NotImplementedError
@@ -1,7 +1,7 @@
 import json
 import re
 from abc import ABC, abstractmethod
-from typing import Any, AsyncGenerator, Optional, Union
+from typing import Any, AsyncGenerator, Optional
 
 from openai.types.chat import ChatCompletion, ChatCompletionMessageParam
 
@@ -90,12 +90,13 @@ async def run_without_streaming(
         )
         chat_completion_response: ChatCompletion = await chat_coroutine
         chat_resp = chat_completion_response.model_dump()  # Convert to dict to make it JSON serializable
-        chat_resp["choices"][0]["context"] = extra_info
+        chat_resp = chat_resp["choices"][0]
+        chat_resp["context"] = extra_info
         if overrides.get("suggest_followup_questions"):
-            content, followup_questions = self.extract_followup_questions(chat_resp["choices"][0]["message"]["content"])
-            chat_resp["choices"][0]["message"]["content"] = content
-            chat_resp["choices"][0]["context"]["followup_questions"] = followup_questions
-        chat_resp["choices"][0]["session_state"] = session_state
+            content, followup_questions = self.extract_followup_questions(chat_resp["message"]["content"])
+            chat_resp["message"]["content"] = content
+            chat_resp["context"]["followup_questions"] = followup_questions
+        chat_resp["session_state"] = session_state
         return chat_resp
 
     async def run_with_streaming(
@@ -108,64 +109,49 @@ async def run_with_streaming(
         extra_info, chat_coroutine = await self.run_until_final_call(
             messages, overrides, auth_claims, should_stream=True
         )
-        yield {
-            "choices": [
-                {
-                    "delta": {"role": "assistant"},
-                    "context": extra_info,
-                    "session_state": session_state,
-                    "finish_reason": None,
-                    "index": 0,
-                }
-            ],
-            "object": "chat.completion.chunk",
-        }
+        yield {"delta": {"role": "assistant"}, "context": extra_info, "session_state": session_state}
 
         followup_questions_started = False
         followup_content = ""
         async for event_chunk in await chat_coroutine:
             # "2023-07-01-preview" API version has a bug where first response has empty choices
             event = event_chunk.model_dump()  # Convert pydantic model to dict
             if event["choices"]:
+                completion = {"delta": event["choices"][0]["delta"]}
                 # if event contains << and not >>, it is start of follow-up question, truncate
-                content = event["choices"][0]["delta"].get("content")
+                content = completion["delta"].get("content")
                 content = content or ""  # content may either not exist in delta, or explicitly be None
                 if overrides.get("suggest_followup_questions") and "<<" in content:
                     followup_questions_started = True
                     earlier_content = content[: content.index("<<")]
                     if earlier_content:
-                        event["choices"][0]["delta"]["content"] = earlier_content
-                        yield event
+                        completion["delta"]["content"] = earlier_content
+                        yield completion
                     followup_content += content[content.index("<<") :]
                 elif followup_questions_started:
                     followup_content += content
                 else:
-                    yield event
+                    yield completion
         if followup_content:
             _, followup_questions = self.extract_followup_questions(followup_content)
-            yield {
-                "choices": [
-                    {
-                        "delta": {"role": "assistant"},
-                        "context": {"followup_questions": followup_questions},
-                        "finish_reason": None,
-                        "index": 0,
-                    }
-                ],
-                "object": "chat.completion.chunk",
-            }
+            yield {"delta": {"role": "assistant"}, "context": {"followup_questions": followup_questions}}
 
     async def run(
         self,
         messages: list[ChatCompletionMessageParam],
-        stream: bool = False,
         session_state: Any = None,
         context: dict[str, Any] = {},
-    ) -> Union[dict[str, Any], AsyncGenerator[dict[str, Any], None]]:
+    ) -> dict[str, Any]:
         overrides = context.get("overrides", {})
         auth_claims = context.get("auth_claims", {})
+        return await self.run_without_streaming(messages, overrides, auth_claims, session_state)
 
-        if stream is False:
-            return await self.run_without_streaming(messages, overrides, auth_claims, session_state)
-        else:
-            return self.run_with_streaming(messages, overrides, auth_claims, session_state)
+    async def run_stream(
+        self,
+        messages: list[ChatCompletionMessageParam],
+        session_state: Any = None,
+        context: dict[str, Any] = {},
+    ) -> AsyncGenerator[dict[str, Any], None]:
+        overrides = context.get("overrides", {})
+        auth_claims = context.get("auth_claims", {})
+        return self.run_with_streaming(messages, overrides, auth_claims, session_state)
@@ -1,4 +1,4 @@
-from typing import Any, AsyncGenerator, Optional, Union
+from typing import Any, Optional
 
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import VectorQuery
@@ -72,10 +72,9 @@ def __init__(
     async def run(
         self,
         messages: list[ChatCompletionMessageParam],
-        stream: bool = False,  # Stream is not used in this approach
         session_state: Any = None,
         context: dict[str, Any] = {},
-    ) -> Union[dict[str, Any], AsyncGenerator[dict[str, Any], None]]:
+    ) -> dict[str, Any]:
         q = messages[-1]["content"]
         if not isinstance(q, str):
             raise ValueError("The most recent message content must be a string.")
@@ -167,6 +166,8 @@ async def run(
             ],
         }
 
-        chat_completion["choices"][0]["context"] = extra_info
-        chat_completion["choices"][0]["session_state"] = session_state
-        return chat_completion
+        completion = {}
+        completion["message"] = chat_completion["choices"][0]["message"]
+        completion["context"] = extra_info
+        completion["session_state"] = session_state
+        return completion
@@ -1,4 +1,4 @@
-from typing import Any, AsyncGenerator, Awaitable, Callable, Optional, Union
+from typing import Any, Awaitable, Callable, Optional
 
 from azure.search.documents.aio import SearchClient
 from azure.storage.blob.aio import ContainerClient
@@ -72,10 +72,9 @@ def __init__(
     async def run(
         self,
         messages: list[ChatCompletionMessageParam],
-        stream: bool = False,  # Stream is not used in this approach
         session_state: Any = None,
         context: dict[str, Any] = {},
-    ) -> Union[dict[str, Any], AsyncGenerator[dict[str, Any], None]]:
+    ) -> dict[str, Any]:
         q = messages[-1]["content"]
         if not isinstance(q, str):
             raise ValueError("The most recent message content must be a string.")
@@ -189,6 +188,9 @@ async def run(
                 ),
             ],
         }
-        chat_completion["choices"][0]["context"] = extra_info
-        chat_completion["choices"][0]["session_state"] = session_state
-        return chat_completion
+
+        completion = {}
+        completion["message"] = chat_completion["choices"][0]["message"]
+        completion["context"] = extra_info
+        completion["session_state"] = session_state
+        return completion
@@ -37,8 +37,12 @@ export async function askApi(request: ChatAppRequest, idToken: string | undefine
     return parsedResponse as ChatAppResponse;
 }
 
-export async function chatApi(request: ChatAppRequest, idToken: string | undefined): Promise<Response> {
-    return await fetch(`${BACKEND_URI}/chat`, {
+export async function chatApi(request: ChatAppRequest, shouldStream: boolean, idToken: string | undefined): Promise<Response> {
+    let url = `${BACKEND_URI}/chat`;
+    if (shouldStream) {
+        url += "/stream";
+    }
+    return await fetch(url, {
         method: "POST",
         headers: { ...getHeaders(idToken), "Content-Type": "application/json" },
         body: JSON.stringify(request)
 
@@ -53,20 +53,19 @@ export type ResponseContext = {
     thoughts: Thoughts[];
 };
 
-export type ResponseChoice = {
-    index: number;
+export type ChatAppResponseOrError = {
     message: ResponseMessage;
+    delta: ResponseMessage;
     context: ResponseContext;
     session_state: any;
-};
-
-export type ChatAppResponseOrError = {
-    choices?: ResponseChoice[];
     error?: string;
 };
 
 export type ChatAppResponse = {
-    choices: ResponseChoice[];
+    message: ResponseMessage;
+    delta: ResponseMessage;
+    context: ResponseContext;
+    session_state: any;
 };
 
 export type ChatAppRequestContext = {
@@ -76,7 +75,6 @@ export type ChatAppRequestContext = {
 export type ChatAppRequest = {
     messages: ResponseMessage[];
     context?: ChatAppRequestContext;
-    stream?: boolean;
     session_state: any;
 };
 
 
@@ -24,8 +24,8 @@ interface Props {
 const pivotItemDisabledStyle = { disabled: true, style: { color: "grey" } };
 
 export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeight, className, onActiveTabChanged }: Props) => {
-    const isDisabledThoughtProcessTab: boolean = !answer.choices[0].context.thoughts;
-    const isDisabledSupportingContentTab: boolean = !answer.choices[0].context.data_points;
+    const isDisabledThoughtProcessTab: boolean = !answer.context.thoughts;
+    const isDisabledSupportingContentTab: boolean = !answer.context.data_points;
     const isDisabledCitationTab: boolean = !activeCitation;
     const [citation, setCitation] = useState("");
 
@@ -81,14 +81,14 @@ export const AnalysisPanel = ({ answer, activeTab, activeCitation, citationHeigh
                 headerText="Thought process"
                 headerButtonProps={isDisabledThoughtProcessTab ? pivotItemDisabledStyle : undefined}
             >
-                <ThoughtProcess thoughts={answer.choices[0].context.thoughts || []} />
+                <ThoughtProcess thoughts={answer.context.thoughts || []} />
             </PivotItem>
             <PivotItem
                 itemKey={AnalysisPanelTabs.SupportingContentTab}
                 headerText="Supporting content"
                 headerButtonProps={isDisabledSupportingContentTab ? pivotItemDisabledStyle : undefined}
             >
-                <SupportingContent supportingContent={answer.choices[0].context.data_points} />
+                <SupportingContent supportingContent={answer.context.data_points} />
             </PivotItem>
             <PivotItem
                 itemKey={AnalysisPanelTabs.CitationTab}
 
@@ -36,8 +36,8 @@ export const Answer = ({
     showSpeechOutputBrowser,
     speechUrl
 }: Props) => {
-    const followupQuestions = answer.choices[0].context.followup_questions;
-    const messageContent = answer.choices[0].message.content;
+    const followupQuestions = answer.context?.followup_questions;
+    const messageContent = answer.message.content;
     const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]);
 
     const sanitizedAnswerHtml = DOMPurify.sanitize(parsedAnswer.answerHtml);
@@ -54,15 +54,15 @@ export const Answer = ({
                             title="Show thought process"
                             ariaLabel="Show thought process"
                             onClick={() => onThoughtProcessClicked()}
-                            disabled={!answer.choices[0].context.thoughts?.length}
+                            disabled={!answer.context.thoughts?.length}
                         />
                         <IconButton
                             style={{ color: "black" }}
                             iconProps={{ iconName: "ClipboardList" }}
                             title="Show supporting content"
                             ariaLabel="Show supporting content"
                             onClick={() => onSupportingContentClicked()}
-                            disabled={!answer.choices[0].context.data_points}
+                            disabled={!answer.context.data_points}
                         />
                         {showSpeechOutputAzure && <SpeechOutputAzure url={speechUrl} />}
                         {showSpeechOutputBrowser && <SpeechOutputBrowser answer={sanitizedAnswerHtml} />}
 
@@ -81,7 +81,7 @@ export function Component(): JSX.Element {
 
     useEffect(() => {
         if (answer && showSpeechOutputAzure) {
-            getSpeechApi(answer.choices[0].message.content).then(speechUrl => {
+            getSpeechApi(answer.message.content).then(speechUrl => {
                 setSpeechUrl(speechUrl);
             });
         }
@@ -126,7 +126,7 @@ export function Component(): JSX.Element {
                     }
                 },
                 // ChatAppProtocol: Client must pass on any session state received from the server
-                session_state: answer ? answer.choices[0].session_state : null
+                session_state: answer ? answer.session_state : null
             };
             const result = await askApi(request, token);
             setAnswer(result);