diff --git a/.github/workflows/python-check.yaml b/.github/workflows/python-check.yaml index 5dbea0c..040b097 100644 --- a/.github/workflows/python-check.yaml +++ b/.github/workflows/python-check.yaml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-20.04", "windows-latest"] + os: ["ubuntu-latest", "windows-latest"] python_version: ["3.11"] steps: - uses: actions/checkout@v4 diff --git a/infra/main.bicep b/infra/main.bicep index cdf701e..24c3b44 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -123,7 +123,7 @@ module aca 'aca.bicep' = { containerAppsEnvironmentName: containerApps.outputs.environmentName containerRegistryName: containerApps.outputs.registryName aiServicesDeploymentName: aiServicesDeploymentName - aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com/models' + aiServicesEndpoint: 'https://${aiServices.outputs.name}.services.ai.azure.com' exists: acaExists } } @@ -167,7 +167,7 @@ output AZURE_LOCATION string = location output AZURE_TENANT_ID string = tenant().tenantId output AZURE_DEEPSEEK_DEPLOYMENT string = aiServicesDeploymentName -output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com/models' +output AZURE_INFERENCE_ENDPOINT string = 'https://${aiServices.outputs.name}.services.ai.azure.com' output SERVICE_ACA_IDENTITY_PRINCIPAL_ID string = aca.outputs.identityPrincipalId output SERVICE_ACA_NAME string = aca.outputs.name diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py index 9d9de44..5643d43 100644 --- a/src/quartapp/chat.py +++ b/src/quartapp/chat.py @@ -1,9 +1,8 @@ import json import os -import httpx from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential, get_bearer_token_provider -from openai import AsyncOpenAI, DefaultAsyncHttpxClient +from openai import AsyncAzureOpenAI from quart import ( Blueprint, Response, @@ -32,21 +31,11 @@ async def configure_openai(): bp.azure_credential, "https://cognitiveservices.azure.com/.default" ) - class TokenBasedAuth(httpx.Auth): - async def async_auth_flow(self, request): - token = await openai_token_provider() - request.headers["Authorization"] = f"Bearer {token}" - yield request - - def sync_auth_flow(self, request): - raise RuntimeError("Cannot use a sync authentication class with httpx.AsyncClient") - # Create the Asynchronous Azure OpenAI client - bp.openai_client = AsyncOpenAI( - base_url=os.environ["AZURE_INFERENCE_ENDPOINT"], - api_key="placeholder", - default_query={"api-version": "2024-05-01-preview"}, - http_client=DefaultAsyncHttpxClient(auth=TokenBasedAuth()), + bp.openai_client = AsyncAzureOpenAI( + azure_endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"], + azure_ad_token_provider=openai_token_provider, + api_version="2025-04-01-preview", # temporary ) # Set the model name to the Azure OpenAI model deployment name @@ -82,29 +71,9 @@ async def response_stream(): ) try: - is_thinking = False async for update in await chat_coroutine: if update.choices: - content = update.choices[0].delta.content - if content == "": - is_thinking = True - update.choices[0].delta.content = None - update.choices[0].delta.reasoning_content = "" - elif content == "": - is_thinking = False - update.choices[0].delta.content = None - update.choices[0].delta.reasoning_content = "" - elif content: - if is_thinking: - yield json.dumps( - {"delta": {"content": None, "reasoning_content": content, "role": "assistant"}}, - ensure_ascii=False, - ) + "\n" - else: - yield json.dumps( - {"delta": {"content": content, "reasoning_content": None, "role": "assistant"}}, - ensure_ascii=False, - ) + "\n" + yield update.choices[0].model_dump_json() + "\n" except Exception as e: current_app.logger.error(e) yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n" diff --git a/src/quartapp/templates/index.html b/src/quartapp/templates/index.html index 7430215..527d3e8 100644 --- a/src/quartapp/templates/index.html +++ b/src/quartapp/templates/index.html @@ -120,7 +120,7 @@ messageDiv.querySelector(".thoughts").style.display = "block"; messageDiv.querySelector(".thoughts-content").innerHTML = converter.makeHtml(thoughts); } - } else { + } else if (event.delta.content) { messageDiv.querySelector(".loading-bar").style.display = "none"; answer += event.delta.content; messageDiv.querySelector(".answer-content").innerHTML = converter.makeHtml(answer); diff --git a/src/requirements.txt b/src/requirements.txt index 7bf7067..b038813 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -38,7 +38,7 @@ cffi==1.17.1 # via cryptography charset-normalizer==3.4.0 # via requests -click==8.1.7 +click==8.2.1 # via # flask # quart @@ -58,7 +58,7 @@ frozenlist==1.4.1 # aiosignal gunicorn==23.0.0 # via quartapp (pyproject.toml) -h11==0.14.0 +h11==0.16.0 # via # httpcore # hypercorn @@ -68,7 +68,7 @@ h2==4.1.0 # via hypercorn hpack==4.0.0 # via h2 -httpcore==1.0.7 +httpcore==1.0.9 # via httpx httptools==0.6.4 # via quartapp (pyproject.toml) @@ -156,7 +156,7 @@ typing-extensions==4.12.2 # pydantic-core urllib3==2.2.3 # via requests -uvicorn==0.32.0 +uvicorn==0.34.2 # via quartapp (pyproject.toml) uvloop==0.20.0 ; sys_platform != "win32" and (sys_platform != "cygwin" and platform_python_implementation != "PyPy") # via quartapp (pyproject.toml) diff --git a/tests/conftest.py b/tests/conftest.py index c5cbac0..ba516b2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ @pytest.fixture def mock_openai_chatcompletion(monkeypatch): class AsyncChatCompletionIterator: - def __init__(self, answer: str): + def __init__(self, reasoning: str, answer: str): self.chunk_index = 0 self.chunks = [ openai.types.chat.ChatCompletionChunk( @@ -32,10 +32,41 @@ def __init__(self, answer: str): ], ) ] + reasoning_deltas = reasoning.split(" ") + for reasoning_index, reasoning_delta in enumerate(reasoning_deltas): + # Text completion chunks include whitespace, so we need to add it back in + if reasoning_index > 0: + answer_delta = " " + reasoning_delta + self.chunks.append( + openai.types.chat.ChatCompletionChunk( + id="test-123", + object="chat.completion.chunk", + choices=[ + openai.types.chat.chat_completion_chunk.Choice( + delta=openai.types.chat.chat_completion_chunk.ChoiceDelta( + role=None, reasoning_content=reasoning_delta + ), + finish_reason=None, + index=0, + logprobs=None, + # Only Azure includes content_filter_results + content_filter_results={ + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + ) + ], + created=1703462735, + model="DeepSeek-R1", + ) + ) + answer_deltas = answer.split(" ") for answer_index, answer_delta in enumerate(answer_deltas): # Text completion chunks include whitespace, so we need to add it back in - if answer_index > 0 and answer_delta != "": + if answer_index > 0: answer_delta = " " + answer_delta self.chunks.append( openai.types.chat.ChatCompletionChunk( @@ -95,9 +126,9 @@ async def mock_acreate(*args, **kwargs): # Only mock a stream=True completion last_message = kwargs.get("messages")[-1]["content"] if last_message == "What is the capital of France?": - return AsyncChatCompletionIterator(" hmm The capital of France is Paris.") + return AsyncChatCompletionIterator("hmm", "The capital of France is Paris.") elif last_message == "What is the capital of Germany?": - return AsyncChatCompletionIterator(" hmm The capital of Germany is Berlin.") + return AsyncChatCompletionIterator("hmm", "The capital of Germany is Berlin.") else: raise ValueError(f"Unexpected message: {last_message}") diff --git a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines index 50730aa..f81bde8 100644 --- a/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text/result.jsonlines @@ -1,7 +1,8 @@ -{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}} -{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " France", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Paris.", "reasoning_content": null, "role": "assistant"}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" France","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Paris.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}} diff --git a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines index 42d9a6c..2d41c6e 100644 --- a/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines +++ b/tests/snapshots/test_app/test_chat_stream_text_history/result.jsonlines @@ -1,7 +1,8 @@ -{"delta": {"content": null, "reasoning_content": " hmm", "role": "assistant"}} -{"delta": {"content": " The", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " capital", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " of", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Germany", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " is", "reasoning_content": null, "role": "assistant"}} -{"delta": {"content": " Berlin.", "reasoning_content": null, "role": "assistant"}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null,"reasoning_content":"hmm"},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":"The","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" capital","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" of","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Germany","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" is","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":" Berlin.","function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":null,"index":0,"logprobs":null,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}} +{"delta":{"content":null,"function_call":null,"refusal":null,"role":null,"tool_calls":null},"finish_reason":"stop","index":0,"logprobs":null,"content_filter_results":{}}