1- import azure . ai . inference . models
1+ import openai
22import pytest
33import pytest_asyncio
44
@@ -13,54 +13,70 @@ class AsyncChatCompletionIterator:
1313 def __init__ (self , answer : str ):
1414 self .chunk_index = 0
1515 self .chunks = [
16- azure .ai .inference .models .StreamingChatCompletionsUpdate (
17- id = "test-123" ,
18- created = 1703462735 ,
19- model = "DeepSeek-R1" ,
20- choices = [
21- azure .ai .inference .models .StreamingChatChoiceUpdate (
22- delta = azure .ai .inference .models .StreamingChatResponseMessageUpdate (
23- content = None , role = "assistant"
24- ),
25- index = 0 ,
26- finish_reason = None ,
27- )
16+ openai .types .chat .ChatCompletionChunk (
17+ object = "chat.completion.chunk" ,
18+ choices = [],
19+ id = "" ,
20+ created = 0 ,
21+ model = "" ,
22+ prompt_filter_results = [
23+ {
24+ "prompt_index" : 0 ,
25+ "content_filter_results" : {
26+ "hate" : {"filtered" : False , "severity" : "safe" },
27+ "self_harm" : {"filtered" : False , "severity" : "safe" },
28+ "sexual" : {"filtered" : False , "severity" : "safe" },
29+ "violence" : {"filtered" : False , "severity" : "safe" },
30+ },
31+ }
2832 ],
29- ),
33+ )
3034 ]
3135 answer_deltas = answer .split (" " )
3236 for answer_index , answer_delta in enumerate (answer_deltas ):
33- # Completion chunks include whitespace, so we need to add it back in
34- if answer_index > 0 :
37+ # Text completion chunks include whitespace, so we need to add it back in
38+ if answer_index > 0 and answer_delta != "</think>" :
3539 answer_delta = " " + answer_delta
3640 self .chunks .append (
37- azure . ai . inference . models . StreamingChatCompletionsUpdate (
41+ openai . types . chat . ChatCompletionChunk (
3842 id = "test-123" ,
39- created = 1703462735 ,
40- model = "DeepSeek-R1" ,
43+ object = "chat.completion.chunk" ,
4144 choices = [
42- azure . ai . inference . models . StreamingChatChoiceUpdate (
43- delta = azure . ai . inference . models . StreamingChatResponseMessageUpdate (
44- content = answer_delta , role = None
45+ openai . types . chat . chat_completion_chunk . Choice (
46+ delta = openai . types . chat . chat_completion_chunk . ChoiceDelta (
47+ role = None , content = answer_delta
4548 ),
46- index = 0 ,
4749 finish_reason = None ,
50+ index = 0 ,
51+ logprobs = None ,
52+ # Only Azure includes content_filter_results
53+ content_filter_results = {
54+ "hate" : {"filtered" : False , "severity" : "safe" },
55+ "self_harm" : {"filtered" : False , "severity" : "safe" },
56+ "sexual" : {"filtered" : False , "severity" : "safe" },
57+ "violence" : {"filtered" : False , "severity" : "safe" },
58+ },
4859 )
4960 ],
61+ created = 1703462735 ,
62+ model = "DeepSeek-R1" ,
5063 )
5164 )
5265 self .chunks .append (
53- azure . ai . inference . models . StreamingChatCompletionsUpdate (
66+ openai . types . chat . ChatCompletionChunk (
5467 id = "test-123" ,
55- created = 1703462735 ,
56- model = "DeepSeek-R1" ,
68+ object = "chat.completion.chunk" ,
5769 choices = [
58- azure . ai . inference . models . StreamingChatChoiceUpdate (
59- delta = azure . ai . inference . models . StreamingChatResponseMessageUpdate (content = None , role = None ),
70+ openai . types . chat . chat_completion_chunk . Choice (
71+ delta = openai . types . chat . chat_completion_chunk . ChoiceDelta (content = None , role = None ),
6072 index = 0 ,
6173 finish_reason = "stop" ,
74+ # Only Azure includes content_filter_results
75+ content_filter_results = {},
6276 )
6377 ],
78+ created = 1703462735 ,
79+ model = "DeepSeek-R1" ,
6480 )
6581 )
6682
@@ -75,28 +91,29 @@ async def __anext__(self):
7591 else :
7692 raise StopAsyncIteration
7793
78- async def mock_complete (* args , ** kwargs ):
94+ async def mock_acreate (* args , ** kwargs ):
7995 # Only mock a stream=True completion
8096 last_message = kwargs .get ("messages" )[- 1 ]["content" ]
8197 if last_message == "What is the capital of France?" :
82- return AsyncChatCompletionIterator ("The capital of France is Paris." )
98+ return AsyncChatCompletionIterator ("<think> hmm </think> The capital of France is Paris." )
8399 elif last_message == "What is the capital of Germany?" :
84- return AsyncChatCompletionIterator ("The capital of Germany is Berlin." )
100+ return AsyncChatCompletionIterator ("<think> hmm </think> The capital of Germany is Berlin." )
85101 else :
86102 raise ValueError (f"Unexpected message: { last_message } " )
87103
88- monkeypatch .setattr ("azure.ai.inference.aio.ChatCompletionsClient.complete " , mock_complete )
104+ monkeypatch .setattr ("openai.resources.chat.AsyncCompletions.create " , mock_acreate )
89105
90106
91107@pytest .fixture
92108def mock_defaultazurecredential (monkeypatch ):
93- monkeypatch .setattr ("azure.identity.aio.DefaultAzureCredential " , mock_cred .MockAzureCredential )
109+ monkeypatch .setattr ("azure.identity.aio.AzureDeveloperCliCredential " , mock_cred .MockAzureCredential )
94110 monkeypatch .setattr ("azure.identity.aio.ManagedIdentityCredential" , mock_cred .MockAzureCredential )
95111
96112
97113@pytest_asyncio .fixture
98114async def client (monkeypatch , mock_openai_chatcompletion , mock_defaultazurecredential ):
99115 monkeypatch .setenv ("AZURE_INFERENCE_ENDPOINT" , "test-deepseek-service.ai.azure.com" )
116+ monkeypatch .setenv ("AZURE_TENANT_ID" , "test-tenant-id" )
100117
101118 quart_app = quartapp .create_app (testing = True )
102119
0 commit comments