diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml
index 5dbea0c..040b097 100644
--- a/.github/workflows/python-check.yaml
+++ b/.github/workflows/python-check.yaml
@@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- os: ["ubuntu-20.04", "windows-latest"]
+ os: ["ubuntu-latest", "windows-latest"]
python_version: ["3.11"]
steps:
- uses: actions/checkout@v4
diff --git a/infra/main.bicep b/infra/main.bicep
index cdf701e..24c3b44 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -123,7 +123,7 @@ module aca 'aca.bicep' = {
containerAppsEnvironmentName: containerApps.outputs.environmentName
containerRegistryName: containerApps.outputs.registryName
aiServicesDeploymentName: aiServicesDeploymentName
- aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
+ aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com'
exists: acaExists
}
}
@@ -167,7 +167,7 @@ output AZURE_LOCATION string = location
output AZURE_TENANT_ID string = tenant().tenantId
output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName
-output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
+output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com'
output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId
output SERVICE_ACA_NAME string = aca.outputs.name
diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py
index 9d9de44..5643d43 100644
--- a/src/quartapp/chat.py
+++ b/src/quartapp/chat.py
@@ -1,9 +1,8 @@
import json
import os
-import httpx
from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider
-from openai import AsyncOpenAI, DefaultAsyncHttpxClient
+from openai import AsyncAzureOpenAI
from quart import (
Blueprint,
Response,
@@ -32,21 +31,11 @@ async def configure_openai():
bp.azure_credential, "https://cognitiveservices.azure.com/.default"
)
- class TokenBasedAuth(httpx.Auth):
- async def async_auth_flow(self, request):
- token = await openai_token_provider()
- request.headers["Authorization"] = f"Bearer {token}"
- yield request
-
- def sync_auth_flow(self, request):
- raise RuntimeError("Cannot use a sync authentication class with httpx.AsyncClient")
-
# Create the Asynchronous Azure OpenAI client
- bp.openai_client = AsyncOpenAI(
- base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
- api_key="placeholder",
- default_query={"api-version": "2024-05-01-preview"},
- http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()),
+ bp.openai_client = AsyncAzureOpenAI(
+ azure_endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
+ azure_ad_token_provider=openai_token_provider,
+ api_version="2025-04-01-preview", # temporary
)
# Set the model name to the Azure OpenAI model deployment name
@@ -82,29 +71,9 @@ async def response_stream():
)
try:
- is_thinking = False
async for update in await chat_coroutine:
if update.choices:
- content = update.choices[0].delta.content
- if content == "":
- is_thinking = True
- update.choices[0].delta.content = None
- update.choices[0].delta.reasoning_content = ""
- elif content == "":
- is_thinking = False
- update.choices[0].delta.content = None
- update.choices[0].delta.reasoning_content = ""
- elif content:
- if is_thinking:
- yield json.dumps(
- {"delta": {"content": None, "reasoning_content": content, "role": "assistant"}},
- ensure_ascii=False,
- ) + "\n"
- else:
- yield json.dumps(
- {"delta": {"content": content, "reasoning_content": None, "role": "assistant"}},
- ensure_ascii=False,
- ) + "\n"
+ yield update.choices[0].model_dump_json() + "\n"
except Exception as e:
current_app.logger.error(e)
yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n"
diff --git a/src/quartapp/templates/index.html b/src/quartapp/templates/index.html
index 7430215..527d3e8 100644
--- a/src/quartapp/templates/index.html
+++ b/src/quartapp/templates/index.html
@@ -120,7 +120,7 @@
messageDiv.querySelector(".thoughts").style.display = "block";
messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts);
}
- } else {
+ } else if (event.delta.content) {
messageDiv.querySelector(".loading-bar").style.display = "none";
answer += event.delta.content;
messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer);
diff --git a/src/requirements.txt b/src/requirements.txt
index 7bf7067..b038813 100644
--- a/src/requirements.txt
+++ b/src/requirements.txt
@@ -38,7 +38,7 @@ cffi==1.17.1
# via cryptography
charset-normalizer==3.4.0
# via requests
-click==8.1.7
+click==8.2.1
# via
# flask
# quart
@@ -58,7 +58,7 @@ frozenlist==1.4.1
# aiosignal
gunicorn==23.0.0
# via quartapp (pyproject.toml)
-h11==0.14.0
+h11==0.16.0
# via
# httpcore
# hypercorn
@@ -68,7 +68,7 @@ h2==4.1.0
# via hypercorn
hpack==4.0.0
# via h2
-httpcore==1.0.7
+httpcore==1.0.9
# via httpx
httptools==0.6.4
# via quartapp (pyproject.toml)
@@ -156,7 +156,7 @@ typing-extensions==4.12.2
# pydantic-core
urllib3==2.2.3
# via requests
-uvicorn==0.32.0
+uvicorn==0.34.2
# via quartapp (pyproject.toml)
uvloop==0.20.0 ; sys_platform != "win32" and (sys_platform != "cygwin" and platform_python_implementation != "PyPy")
# via quartapp (pyproject.toml)
diff --git a/tests/conftest.py b/tests/conftest.py
index c5cbac0..ba516b2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
@pytest.fixture
def mock_openai_chatcompletion(monkeypatch):
class AsyncChatCompletionIterator:
- def __init__(self, answer: str):
+ def __init__(self, reasoning: str, answer: str):
self.chunk_index = 0
self.chunks = [
openai.types.chat.ChatCompletionChunk(
@@ -32,10 +32,41 @@ def __init__(self, answer: str):
],
)
]
+ reasoning_deltas = reasoning.split(" ")
+ for reasoning_index, reasoning_delta in enumerate(reasoning_deltas):
+ # Text completion chunks include whitespace, so we need to add it back in
+ if reasoning_index > 0:
+ answer_delta = " " + reasoning_delta
+ self.chunks.append(
+ openai.types.chat.ChatCompletionChunk(
+ id="test-123",
+ object="chat.completion.chunk",
+ choices=[
+ openai.types.chat.chat_completion_chunk.Choice(
+ delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
+ role=None, reasoning_content=reasoning_delta
+ ),
+ finish_reason=None,
+ index=0,
+ logprobs=None,
+ # Only Azure includes content_filter_results
+ content_filter_results={
+ "hate": {"filtered": False, "severity": "safe"},
+ "self_harm": {"filtered": False, "severity": "safe"},
+ "sexual": {"filtered": False, "severity": "safe"},
+ "violence": {"filtered": False, "severity": "safe"},
+ },
+ )
+ ],
+ created=1703462735,
+ model="DeepSeek-R1",
+ )
+ )
+
answer_deltas = answer.split(" ")
for answer_index, answer_delta in enumerate(answer_deltas):
# Text completion chunks include whitespace, so we need to add it back in
- if answer_index > 0 and answer_delta != "":
+ if answer_index > 0:
answer_delta = " " + answer_delta
self.chunks.append(
openai.types.chat.ChatCompletionChunk(
@@ -95,9 +126,9 @@ async def mock_acreate(*args, **kwargs):
# Only mock a stream=True completion
last_message = kwargs.get("messages")[-1]["content"]
if last_message == "What is the capital of France?":
- return AsyncChatCompletionIterator(" hmm The capital of France is Paris.")
+ return AsyncChatCompletionIterator("hmm", "The capital of France is Paris.")
elif last_message == "What is the capital of Germany?":
- return AsyncChatCompletionIterator(" hmm The capital of Germany is Berlin.")
+ return AsyncChatCompletionIterator("hmm", "The capital of Germany is Berlin.")
else:
raise ValueError(f"Unexpected message: {last_message}")
diff --git a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines
index 50730aa..f81bde8 100644
--- a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines
@@ -1,7 +1,8 @@
-{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
-{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " France", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Paris.", "reasoning_content": null, "role": "assistant"}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" France","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Paris.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}
diff --git a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines
index 42d9a6c..2d41c6e 100644
--- a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines
+++ b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines
@@ -1,7 +1,8 @@
-{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
-{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Germany", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
-{"delta": {"content": " Berlin.", "reasoning_content": null, "role": "assistant"}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Germany","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":" Berlin.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
+{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}