Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ["ubuntu-20.04", "windows-latest"]
os: ["ubuntu-latest", "windows-latest"]
python_version: ["3.11"]
steps:
- uses: actions/checkout@v4
Expand Down
4 changes: 2 additions & 2 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ module aca 'aca.bicep' = {
containerAppsEnvironmentName: containerApps.outputs.environmentName
containerRegistryName: containerApps.outputs.registryName
aiServicesDeploymentName: aiServicesDeploymentName
aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com'
exists: acaExists
}
}
Expand Down Expand Up @@ -167,7 +167,7 @@ output AZURE_LOCATION string = location
output AZURE_TENANT_ID string = tenant().tenantId

output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName
output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/models'
output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com'

output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId
output SERVICE_ACA_NAME string = aca.outputs.name
Expand Down
43 changes: 6 additions & 37 deletions src/quartapp/chat.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import json
import os

import httpx
from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider
from openai import AsyncOpenAI, DefaultAsyncHttpxClient
from openai import AsyncAzureOpenAI
from quart import (
Blueprint,
Response,
Expand Down Expand Up @@ -32,21 +31,11 @@ async def configure_openai():
bp.azure_credential, "https://cognitiveservices.azure.com/.default"
)

class TokenBasedAuth(httpx.Auth):
async def async_auth_flow(self, request):
token = await openai_token_provider()
request.headers["Authorization"] = f"Bearer {token}"
yield request

def sync_auth_flow(self, request):
raise RuntimeError("Cannot use a sync authentication class with httpx.AsyncClient")

# Create the Asynchronous Azure OpenAI client
bp.openai_client = AsyncOpenAI(
base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
api_key="placeholder",
default_query={"api-version": "2024-05-01-preview"},
http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()),
bp.openai_client = AsyncAzureOpenAI(
azure_endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
azure_ad_token_provider=openai_token_provider,
api_version="2025-04-01-preview", # temporary
)

# Set the model name to the Azure OpenAI model deployment name
Expand Down Expand Up @@ -82,29 +71,9 @@ async def response_stream():
)

try:
is_thinking = False
async for update in await chat_coroutine:
if update.choices:
content = update.choices[0].delta.content
if content == "<think>":
is_thinking = True
update.choices[0].delta.content = None
update.choices[0].delta.reasoning_content = ""
elif content == "</think>":
is_thinking = False
update.choices[0].delta.content = None
update.choices[0].delta.reasoning_content = ""
elif content:
if is_thinking:
yield json.dumps(
{"delta": {"content": None, "reasoning_content": content, "role": "assistant"}},
ensure_ascii=False,
) + "\n"
else:
yield json.dumps(
{"delta": {"content": content, "reasoning_content": None, "role": "assistant"}},
ensure_ascii=False,
) + "\n"
yield update.choices[0].model_dump_json() + "\n"
except Exception as e:
current_app.logger.error(e)
yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n"
Expand Down
2 changes: 1 addition & 1 deletion src/quartapp/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
messageDiv.querySelector(".thoughts").style.display = "block";
messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts);
}
} else {
} else if (event.delta.content) {
messageDiv.querySelector(".loading-bar").style.display = "none";
answer += event.delta.content;
messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer);
Expand Down
8 changes: 4 additions & 4 deletions src/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ cffi==1.17.1
# via cryptography
charset-normalizer==3.4.0
# via requests
click==8.1.7
click==8.2.1
# via
# flask
# quart
Expand All @@ -58,7 +58,7 @@ frozenlist==1.4.1
# aiosignal
gunicorn==23.0.0
# via quartapp (pyproject.toml)
h11==0.14.0
h11==0.16.0
# via
# httpcore
# hypercorn
Expand All @@ -68,7 +68,7 @@ h2==4.1.0
# via hypercorn
hpack==4.0.0
# via h2
httpcore==1.0.7
httpcore==1.0.9
# via httpx
httptools==0.6.4
# via quartapp (pyproject.toml)
Expand Down Expand Up @@ -156,7 +156,7 @@ typing-extensions==4.12.2
# pydantic-core
urllib3==2.2.3
# via requests
uvicorn==0.32.0
uvicorn==0.34.2
# via quartapp (pyproject.toml)
uvloop==0.20.0 ; sys_platform != "win32" and (sys_platform != "cygwin" and platform_python_implementation != "PyPy")
# via quartapp (pyproject.toml)
Expand Down
39 changes: 35 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
@pytest.fixture
def mock_openai_chatcompletion(monkeypatch):
class AsyncChatCompletionIterator:
def __init__(self, answer: str):
def __init__(self, reasoning: str, answer: str):
self.chunk_index = 0
self.chunks = [
openai.types.chat.ChatCompletionChunk(
Expand All @@ -32,10 +32,41 @@ def __init__(self, answer: str):
],
)
]
reasoning_deltas = reasoning.split(" ")
for reasoning_index, reasoning_delta in enumerate(reasoning_deltas):
# Text completion chunks include whitespace, so we need to add it back in
if reasoning_index > 0:
answer_delta = " " + reasoning_delta
self.chunks.append(
openai.types.chat.ChatCompletionChunk(
id="test-123",
object="chat.completion.chunk",
choices=[
openai.types.chat.chat_completion_chunk.Choice(
delta=openai.types.chat.chat_completion_chunk.ChoiceDelta(
role=None, reasoning_content=reasoning_delta
),
finish_reason=None,
index=0,
logprobs=None,
# Only Azure includes content_filter_results
content_filter_results={
"hate": {"filtered": False, "severity": "safe"},
"self_harm": {"filtered": False, "severity": "safe"},
"sexual": {"filtered": False, "severity": "safe"},
"violence": {"filtered": False, "severity": "safe"},
},
)
],
created=1703462735,
model="DeepSeek-R1",
)
)

answer_deltas = answer.split(" ")
for answer_index, answer_delta in enumerate(answer_deltas):
# Text completion chunks include whitespace, so we need to add it back in
if answer_index > 0 and answer_delta != "</think>":
if answer_index > 0:
answer_delta = " " + answer_delta
self.chunks.append(
openai.types.chat.ChatCompletionChunk(
Expand Down Expand Up @@ -95,9 +126,9 @@ async def mock_acreate(*args, **kwargs):
# Only mock a stream=True completion
last_message = kwargs.get("messages")[-1]["content"]
if last_message == "What is the capital of France?":
return AsyncChatCompletionIterator("<think> hmm </think> The capital of France is Paris.")
return AsyncChatCompletionIterator("hmm", "The capital of France is Paris.")
elif last_message == "What is the capital of Germany?":
return AsyncChatCompletionIterator("<think> hmm </think> The capital of Germany is Berlin.")
return AsyncChatCompletionIterator("hmm", "The capital of Germany is Berlin.")
else:
raise ValueError(f"Unexpected message: {last_message}")

Expand Down
15 changes: 8 additions & 7 deletions tests/snapshots/test_app/test_chat_stream_text/result.jsonlines
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " France", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " Paris.", "reasoning_content": null, "role": "assistant"}}
{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" France","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" Paris.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}}
{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " Germany", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}}
{"delta": {"content": " Berlin.", "reasoning_content": null, "role": "assistant"}}
{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" Germany","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":" Berlin.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}
{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}
Loading