11import json
22import os
3+ import time
34
4- from azure .ai .inference .aio import ChatCompletionsClient
5- from azure .ai .inference .models import SystemMessage
6- from azure .identity .aio import (
7- AzureDeveloperCliCredential ,
8- ChainedTokenCredential ,
9- ManagedIdentityCredential ,
10- )
5+ from azure .identity .aio import AzureDeveloperCliCredential , ManagedIdentityCredential
6+ from openai import AsyncOpenAI
117from quart import (
128 Blueprint ,
139 Response ,
2218
2319@bp .before_app_serving
2420async def configure_openai ():
25- # Use ManagedIdentityCredential with the client_id for user-assigned managed identities
26- user_assigned_managed_identity_credential = ManagedIdentityCredential (client_id = os .getenv ("AZURE_CLIENT_ID" ))
27-
28- # Use AzureDeveloperCliCredential with the current tenant.
29- azure_dev_cli_credential = AzureDeveloperCliCredential (tenant_id = os .getenv ("AZURE_TENANT_ID" ), process_timeout = 60 )
30-
31- # Create a ChainedTokenCredential with ManagedIdentityCredential and AzureDeveloperCliCredential
32- # - ManagedIdentityCredential is used for deployment on Azure Container Apps
33-
34- # - AzureDeveloperCliCredential is used for local development
35- # The order of the credentials is important, as the first valid token is used
36- # For more information check out:
37-
38- # https://learn.microsoft.com/azure/developer/python/sdk/authentication/credential-chains?tabs=ctc#chainedtokencredential-overview
39- azure_credential = ChainedTokenCredential (user_assigned_managed_identity_credential , azure_dev_cli_credential )
40- current_app .logger .info ("Using Azure OpenAI with credential" )
41-
42- if not os .getenv ("AZURE_INFERENCE_ENDPOINT" ):
43- raise ValueError ("AZURE_INFERENCE_ENDPOINT is required for Azure OpenAI" )
21+ if os .getenv ("RUNNING_IN_PRODUCTION" ):
22+ client_id = os .environ ["AZURE_CLIENT_ID" ]
23+ current_app .logger .info ("Using Azure OpenAI with managed identity credential for client ID: %s" , client_id )
24+ bp .azure_credential = ManagedIdentityCredential (client_id = client_id )
25+ else :
26+ tenant_id = os .environ ["AZURE_TENANT_ID" ]
27+ current_app .logger .info ("Using Azure OpenAI with Azure Developer CLI credential for tenant ID: %s" , tenant_id )
28+ bp .azure_credential = AzureDeveloperCliCredential (tenant_id = tenant_id )
29+
30+ # Get the token provider for Azure OpenAI based on the selected Azure credential
31+ bp .openai_token = await bp .azure_credential .get_token ("https://cognitiveservices.azure.com/.default" )
4432
4533 # Create the Asynchronous Azure OpenAI client
46- bp .ai_client = ChatCompletionsClient (
47- endpoint = os .environ ["AZURE_INFERENCE_ENDPOINT" ],
48- credential = azure_credential ,
49- credential_scopes = ["https://cognitiveservices.azure.com/.default" ],
50- model = "DeepSeek-R1" ,
34+ bp .openai_client = AsyncOpenAI (
35+ base_url = os .environ ["AZURE_INFERENCE_ENDPOINT" ],
36+ api_key = bp .openai_token .token ,
37+ default_query = {"api-version" : "2024-05-01-preview" },
5138 )
5239
40+ # Set the model name to the Azure OpenAI model deployment name
41+ bp .openai_model = os .getenv ("AZURE_DEEPSEEK_DEPLOYMENT" )
42+
5343
5444@bp .after_app_serving
5545async def shutdown_openai ():
56- await bp .ai_client .close ()
46+ await bp .openai_client .close ()
5747
5848
5949@bp .get ("/" )
6050async def index ():
6151 return await render_template ("index.html" )
6252
6353
54+ @bp .before_request
55+ async def maybe_refresh_token ():
56+ if bp .openai_token .expires_on < (time .time () + 60 ):
57+ current_app .logger .info ("Token is expired, refreshing token." )
58+ openai_token = await bp .azure_credential .get_token ("https://cognitiveservices.azure.com/.default" )
59+ bp .openai_client .api_key = openai_token .token
60+
61+
6462@bp .post ("/chat/stream" )
6563async def chat_handler ():
6664 request_messages = (await request .get_json ())["messages" ]
@@ -69,15 +67,19 @@ async def chat_handler():
6967 async def response_stream ():
7068 # This sends all messages, so API request may exceed token limits
7169 all_messages = [
72- SystemMessage ( content = " You are a helpful assistant.") ,
70+ { "role" : "system" , " content" : " You are a helpful assistant."} ,
7371 ] + request_messages
7472
75- client : ChatCompletionsClient = bp .ai_client
76- result = await client .complete (messages = all_messages , max_tokens = 2048 , stream = True )
73+ chat_coroutine = bp .openai_client .chat .completions .create (
74+ # Azure Open AI takes the deployment name as the model name
75+ model = bp .openai_model ,
76+ messages = all_messages ,
77+ stream = True ,
78+ )
7779
7880 try :
7981 is_thinking = False
80- async for update in result :
82+ async for update in await chat_coroutine :
8183 if update .choices :
8284 content = update .choices [0 ].delta .content
8385 if content == "<think>" :
@@ -103,4 +105,4 @@ async def response_stream():
103105 current_app .logger .error (e )
104106 yield json .dumps ({"error" : str (e )}, ensure_ascii = False ) + "\n "
105107
106- return Response (response_stream (), mimetype = "application/json" )
108+ return Response (response_stream ())
0 commit comments