Merge pull request #15 from Azure-Samples/thinkchange

pamelafox · web-flow · commit d6ade4d5239b · 2025-06-02T11:44:54.000-07:00
Changes for new thinking format and URL format
diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml
@@ -13,7 +13,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ["ubuntu-20.04", "windows-latest"]
+        os: ["ubuntu-latest", "windows-latest"]
         python_version: ["3.11"]
     steps:
         - uses: actions/checkout@v4
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -123,7 +123,7 @@ module aca 'aca.bicep' = {
     containerAppsEnvironmentName: containerApps.outputs.environmentName
     containerRegistryName: containerApps.outputs.registryName
     aiServicesDeploymentName: aiServicesDeploymentName
-    aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
+    aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com'
     exists: acaExists
   }
 }
@@ -167,7 +167,7 @@ output AZURE_LOCATION string = location
 output AZURE_TENANT_ID string = tenant().tenantId
 
 output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName
-output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
+output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com'
 
 output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId
 output SERVICE_ACA_NAME string = aca.outputs.name
diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py
@@ -1,9 +1,8 @@
 import json
 import os
 
-import httpx
 from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider
-from openai import AsyncOpenAI, DefaultAsyncHttpxClient
+from openai import AsyncAzureOpenAI
 from quart import (
     Blueprint,
     Response,
@@ -32,21 +31,11 @@ async def configure_openai():
         bp.azure_credential, "https://cognitiveservices.azure.com/.default"
     )
 
-    class TokenBasedAuth(httpx.Auth):
-        async def async_auth_flow(self, request):
-            token = await openai_token_provider()
-            request.headers["Authorization"] = f"Bearer {token}"
-            yield request
-
-        def sync_auth_flow(self, request):
-            raise RuntimeError("Cannot use a sync authentication class with httpx.AsyncClient")
-
     # Create the Asynchronous Azure OpenAI client
-    bp.openai_client = AsyncOpenAI(
-        base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
-        api_key="placeholder",
-        default_query={"api-version": "2024-05-01-preview"},
-        http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()),
+    bp.openai_client = AsyncAzureOpenAI(
+        azure_endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
+        azure_ad_token_provider=openai_token_provider,
+        api_version="2025-04-01-preview",  # temporary
     )
 
     # Set the model name to the Azure OpenAI model deployment name
@@ -82,29 +71,9 @@ async def response_stream():
         )
 
         try:
-            is_thinking = False
             async for update in await chat_coroutine:
                 if update.choices:
-                    content = update.choices[0].delta.content
-                    if content == "<think>":
-                        is_thinking = True
-                        update.choices[0].delta.content = None
-                        update.choices[0].delta.reasoning_content = ""
-                    elif content == "</think>":
-                        is_thinking = False
-                        update.choices[0].delta.content = None
-                        update.choices[0].delta.reasoning_content = ""
-                    elif content:
-                        if is_thinking:
-                            yield json.dumps(
-                                {"delta": {"content": None, "reasoning_content": content, "role": "assistant"}},
-                                ensure_ascii=False,
-                            ) + "\n"
-                        else:
-                            yield json.dumps(
-                                {"delta": {"content": content, "reasoning_content": None, "role": "assistant"}},
-                                ensure_ascii=False,
-                            ) + "\n"
+                    yield update.choices[0].model_dump_json() + "\n"
         except Exception as e:
             current_app.logger.error(e)
             yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n"
diff --git a/src/quartapp/templates/index.html b/src/quartapp/templates/index.html
@@ -120,7 +120,7 @@
                         messageDiv.querySelector(".thoughts").style.display = "block";
                         messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts);
                     }
-                } else {
+                } else if (event.delta.content) {
                     messageDiv.querySelector(".loading-bar").style.display = "none";
                     answer += event.delta.content;
                     messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer);
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -38,7 +38,7 @@ cffi==1.17.1
     # via cryptography
 charset-normalizer==3.4.0
     # via requests
-click==8.1.7
+click==8.2.1
     # via
     #   flask
     #   quart
@@ -58,7 +58,7 @@ frozenlist==1.4.1
     #   aiosignal
 gunicorn==23.0.0
     # via quartapp (pyproject.toml)
-h11==0.14.0
+h11==0.16.0
     # via
     #   httpcore
     #   hypercorn
@@ -68,7 +68,7 @@ h2==4.1.0
     # via hypercorn
 hpack==4.0.0
     # via h2
-httpcore==1.0.7
+httpcore==1.0.9
     # via httpx
 httptools==0.6.4
     # via quartapp (pyproject.toml)
@@ -156,7 +156,7 @@ typing-extensions==4.12.2
     #   pydantic-core
 urllib3==2.2.3
     # via requests
-uvicorn==0.32.0
+uvicorn==0.34.2
     # via quartapp (pyproject.toml)
 uvloop==0.20.0 ; sys_platform != "win32" and (sys_platform != "cygwin" and platform_python_implementation != "PyPy")
     # via quartapp (pyproject.toml)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -10,7 +10,7 @@
 @pytest.fixture
 def mock_openai_chatcompletion(monkeypatch):
     class AsyncChatCompletionIterator:
-        def __init__(self, answer: str):
+        def __init__(self, reasoning: str, answer: str):
             self.chunk_index = 0
             self.chunks = [
                 openai.types.chat.ChatCompletionChunk(
@@ -32,10 +32,41 @@ def __init__(self, answer: str):
                     ],
                 )
             ]
+            reasoning_deltas = reasoning.split(" ")
+            for reasoning_index, reasoning_delta in enumerate(reasoning_deltas):
+                # Text completion chunks include whitespace, so we need to add it back in
+                if reasoning_index > 0:
+                    answer_delta = " " + reasoning_delta
+                self.chunks.append(
+                    openai.types.chat.ChatCompletionChunk(
+                        id="test-123",
+                        object="chat.completion.chunk",
+                        choices=[
+                            openai.types.chat.chat_completion_chunk.Choice(
+                                delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+                                    role=None, reasoning_content=reasoning_delta
+                                ),
+                                finish_reason=None,
+                                index=0,
+                                logprobs=None,
+                                # Only Azure includes content_filter_results
+                                content_filter_results={
+                                    "hate": {"filtered": False, "severity": "safe"},
+                                    "self_harm": {"filtered": False, "severity": "safe"},
+                                    "sexual": {"filtered": False, "severity": "safe"},
+                                    "violence": {"filtered": False, "severity": "safe"},
+                                },
+                            )
+                        ],
+                        created=1703462735,
+                        model="DeepSeek-R1",
+                    )
+                )
+
             answer_deltas = answer.split(" ")
             for answer_index, answer_delta in enumerate(answer_deltas):
                 # Text completion chunks include whitespace, so we need to add it back in
-                if answer_index > 0 and answer_delta != "</think>":
+                if answer_index > 0:
                     answer_delta = " " + answer_delta
                 self.chunks.append(
                     openai.types.chat.ChatCompletionChunk(
@@ -95,9 +126,9 @@ async def mock_acreate(*args, **kwargs):
         # Only mock a stream=True completion
         last_message = kwargs.get("messages")[-1]["content"]
         if last_message == "What is the capital of France?":
-            return AsyncChatCompletionIterator("<think> hmm </think> The capital of France is Paris.")
+            return AsyncChatCompletionIterator("hmm", "The capital of France is Paris.")
         elif last_message == "What is the capital of Germany?":
-            return AsyncChatCompletionIterator("<think> hmm </think> The capital of Germany is Berlin.")
+            return AsyncChatCompletionIterator("hmm", "The capital of Germany is Berlin.")
         else:
             raise ValueError(f"Unexpected message: {last_message}")
 
diff --git a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines
@@ -1,7 +1,8 @@
-{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
-{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " France", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Paris.", "reasoning_content": null, "role": "assistant"}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" France","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Paris.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines
@@ -1,7 +1,8 @@
-{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
-{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Germany", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Berlin.", "reasoning_content": null, "role": "assistant"}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Germany","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Berlin.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}

Original file line number	Diff line number	Diff line change
`@@ -120,7 +120,7 @@`
`120`	`120`	`messageDiv.querySelector(".thoughts").style.display = "block";`
`121`	`121`	`messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts);`
`122`	`122`	`}`
`123`		`- } else {`
	`123`	`+ } else if (event.delta.content) {`
`124`	`124`	`messageDiv.querySelector(".loading-bar").style.display = "none";`
`125`	`125`	`answer += event.delta.content;`
`126`	`126`	`messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer);`