Port back to openAI SDK

pamelafox · pamelafox · commit 2f07eda08007 · 2025-03-12T13:12:18.000-07:00
diff --git a/src/pyproject.toml b/src/pyproject.toml
@@ -13,8 +13,8 @@ dependencies = [
     "httptools",
     # Used by uvicorn for reload functionality
     "watchfiles",
-    "azure-ai-inference",
     "azure-identity",
+    "openai",
     "aiohttp",
     "python-dotenv",
     "pyyaml"
diff --git a/src/quartapp/chat.py b/src/quartapp/chat.py
@@ -1,13 +1,9 @@
 import json
 import os
+import time
 
-from azure.ai.inference.aio import ChatCompletionsClient
-from azure.ai.inference.models import SystemMessage
-from azure.identity.aio import (
-    AzureDeveloperCliCredential,
-    ChainedTokenCredential,
-    ManagedIdentityCredential,
-)
+from azure.identity.aio import AzureDeveloperCliCredential, ManagedIdentityCredential
+from openai import AsyncOpenAI
 from quart import (
     Blueprint,
     Response,
@@ -22,45 +18,47 @@
 
 @bp.before_app_serving
 async def configure_openai():
-    # Use ManagedIdentityCredential with the client_id for user-assigned managed identities
-    user_assigned_managed_identity_credential = ManagedIdentityCredential(client_id=os.getenv("AZURE_CLIENT_ID"))
-
-    # Use AzureDeveloperCliCredential with the current tenant.
-    azure_dev_cli_credential = AzureDeveloperCliCredential(tenant_id=os.getenv("AZURE_TENANT_ID"), process_timeout=60)
-
-    # Create a ChainedTokenCredential with ManagedIdentityCredential and AzureDeveloperCliCredential
-    #  - ManagedIdentityCredential is used for deployment on Azure Container Apps
-
-    #  - AzureDeveloperCliCredential is used for local development
-    # The order of the credentials is important, as the first valid token is used
-    # For more information check out:
-
-    # https://learn.microsoft.com/azure/developer/python/sdk/authentication/credential-chains?tabs=ctc#chainedtokencredential-overview
-    azure_credential = ChainedTokenCredential(user_assigned_managed_identity_credential, azure_dev_cli_credential)
-    current_app.logger.info("Using Azure OpenAI with credential")
-
-    if not os.getenv("AZURE_INFERENCE_ENDPOINT"):
-        raise ValueError("AZURE_INFERENCE_ENDPOINT is required for Azure OpenAI")
+    if os.getenv("RUNNING_IN_PRODUCTION"):
+        client_id = os.environ["AZURE_CLIENT_ID"]
+        current_app.logger.info("Using Azure OpenAI with managed identity credential for client ID: %s", client_id)
+        bp.azure_credential = ManagedIdentityCredential(client_id=client_id)
+    else:
+        tenant_id = os.environ["AZURE_TENANT_ID"]
+        current_app.logger.info("Using Azure OpenAI with Azure Developer CLI credential for tenant ID: %s", tenant_id)
+        bp.azure_credential = AzureDeveloperCliCredential(tenant_id=tenant_id)
+
+    # Get the token provider for Azure OpenAI based on the selected Azure credential
+    bp.openai_token = await bp.azure_credential.get_token("https://cognitiveservices.azure.com/.default")
 
     # Create the Asynchronous Azure OpenAI client
-    bp.ai_client = ChatCompletionsClient(
-        endpoint=os.environ["AZURE_INFERENCE_ENDPOINT"],
-        credential=azure_credential,
-        credential_scopes=["https://cognitiveservices.azure.com/.default"],
-        model="DeepSeek-R1",
+    bp.openai_client = AsyncOpenAI(
+        base_url=os.environ["AZURE_INFERENCE_ENDPOINT"],
+        api_key=bp.openai_token.token,
+        default_query={"api-version": "2024-05-01-preview"},
     )
 
+    # Set the model name to the Azure OpenAI model deployment name
+    bp.openai_model = os.getenv("AZURE_DEEPSEEK_DEPLOYMENT")
+
 
 @bp.after_app_serving
 async def shutdown_openai():
-    await bp.ai_client.close()
+    await bp.openai_client.close()
 
 
 @bp.get("/")
 async def index():
     return await render_template("index.html")
 
 
+@bp.before_request
+async def maybe_refresh_token():
+    if bp.openai_token.expires_on < (time.time() + 60):
+        current_app.logger.info("Token is expired, refreshing token.")
+        openai_token = await bp.azure_credential.get_token("https://cognitiveservices.azure.com/.default")
+        bp.openai_client.api_key = openai_token.token
+
+
 @bp.post("/chat/stream")
 async def chat_handler():
     request_messages = (await request.get_json())["messages"]
@@ -69,15 +67,19 @@ async def chat_handler():
     async def response_stream():
         # This sends all messages, so API request may exceed token limits
         all_messages = [
-            SystemMessage(content="You are a helpful assistant."),
+            {"role": "system", "content": "You are a helpful assistant."},
         ] + request_messages
 
-        client: ChatCompletionsClient = bp.ai_client
-        result = await client.complete(messages=all_messages, max_tokens=2048, stream=True)
+        chat_coroutine = bp.openai_client.chat.completions.create(
+            # Azure Open AI takes the deployment name as the model name
+            model=bp.openai_model,
+            messages=all_messages,
+            stream=True,
+        )
 
         try:
             is_thinking = False
-            async for update in result:
+            async for update in await chat_coroutine:
                 if update.choices:
                     content = update.choices[0].delta.content
                     if content == "<think>":
@@ -103,4 +105,4 @@ async def response_stream():
             current_app.logger.error(e)
             yield json.dumps({"error": str(e)}, ensure_ascii=False) + "\n"
 
-    return Response(response_stream(), mimetype="application/json")
+    return Response(response_stream())
diff --git a/src/requirements.txt b/src/requirements.txt
@@ -1,5 +1,5 @@
 #
-# This file is autogenerated by pip-compile with Python 3.11
+# This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
 #    pip-compile --output-file=requirements.txt pyproject.toml
@@ -12,24 +12,28 @@ aiohttp==3.10.11
     # via quartapp (pyproject.toml)
 aiosignal==1.3.1
     # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
 anyio==4.6.0
-    # via watchfiles
+    # via
+    #   httpx
+    #   openai
+    #   watchfiles
 attrs==24.2.0
     # via aiohttp
-azure-ai-inference==1.0.0b8
-    # via quartapp (pyproject.toml)
 azure-core==1.31.0
-    # via
-    #   azure-ai-inference
-    #   azure-identity
+    # via azure-identity
 azure-identity==1.19.0
     # via quartapp (pyproject.toml)
 blinker==1.8.2
     # via
     #   flask
     #   quart
 certifi==2024.8.30
-    # via requests
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
 cffi==1.17.1
     # via cryptography
 charset-normalizer==3.4.0
@@ -44,6 +48,8 @@ cryptography==44.0.1
     #   azure-identity
     #   msal
     #   pyjwt
+distro==1.9.0
+    # via openai
 flask==3.0.3
     # via quart
 frozenlist==1.4.1
@@ -54,26 +60,30 @@ gunicorn==23.0.0
     # via quartapp (pyproject.toml)
 h11==0.14.0
     # via
+    #   httpcore
     #   hypercorn
     #   uvicorn
     #   wsproto
 h2==4.1.0
     # via hypercorn
 hpack==4.0.0
     # via h2
+httpcore==1.0.7
+    # via httpx
 httptools==0.6.4
     # via quartapp (pyproject.toml)
+httpx==0.28.1
+    # via openai
 hypercorn==0.17.3
     # via quart
 hyperframe==6.0.1
     # via h2
 idna==3.10
     # via
     #   anyio
+    #   httpx
     #   requests
     #   yarl
-isodate==0.7.2
-    # via azure-ai-inference
 itsdangerous==2.2.0
     # via
     #   flask
@@ -82,6 +92,8 @@ jinja2==3.1.5
     # via
     #   flask
     #   quart
+jiter==0.9.0
+    # via openai
 markupsafe==3.0.1
     # via
     #   jinja2
@@ -97,6 +109,8 @@ multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
+openai==1.66.2
+    # via quartapp (pyproject.toml)
 packaging==24.1
     # via gunicorn
 portalocker==2.10.1
@@ -107,8 +121,14 @@ propcache==0.2.0
     # via yarl
 pycparser==2.22
     # via cffi
+pydantic==2.10.6
+    # via openai
+pydantic-core==2.27.2
+    # via pydantic
 pyjwt[crypto]==2.9.0
-    # via msal
+    # via
+    #   msal
+    #   pyjwt
 python-dotenv==1.0.1
     # via quartapp (pyproject.toml)
 pyyaml==6.0.2
@@ -122,12 +142,18 @@ requests==2.32.3
 six==1.16.0
     # via azure-core
 sniffio==1.3.1
-    # via anyio
+    # via
+    #   anyio
+    #   openai
+tqdm==4.67.1
+    # via openai
 typing-extensions==4.12.2
     # via
-    #   azure-ai-inference
     #   azure-core
     #   azure-identity
+    #   openai
+    #   pydantic
+    #   pydantic-core
 urllib3==2.2.3
     # via requests
 uvicorn==0.32.0