Skip to content

Commit 1cff62f

Browse files
authored
Merge pull request #2978 from MicrosoftDocs/main
2/14/2025 PM Publish
2 parents b07793e + 0d32529 commit 1cff62f

39 files changed

+804
-493
lines changed

.whatsnew.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"$schema": "https://whatsnewapi.azurewebsites.net/schema",
3+
"docSetProductName": "Azure AI Services",
4+
"rootDirectory": "articles/",
5+
"docLinkSettings": {
6+
"linkFormat": "relative",
7+
"relativeLinkPrefix": "../"
8+
},
9+
"navigationOptions": {
10+
"maximumNumberOfArticles": 5,
11+
"tocParentNode": "Latest documentation updates",
12+
"repoTocFolder": "articles/ai-services",
13+
"indexParentNode": "Latest documentation updates",
14+
"repoIndexFolder": "articles/ai-services"
15+
},
16+
"areas": [
17+
{
18+
"names": [ "ai-services" ],
19+
"heading": "Azure AI Services"
20+
},
21+
{
22+
"names": [ "ai-studio" ],
23+
"heading": "Azure AI Foundry"
24+
},
25+
{
26+
"names": [ "machine-learning" ],
27+
"heading": "Azure Machine Learning"
28+
},
29+
{
30+
"names": [ "search" ],
31+
"heading": "Azure AI Search"
32+
33+
}
34+
]
35+
}

articles/ai-foundry/model-inference/how-to/use-embeddings.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,5 @@ zone_pivot_groups: azure-ai-inference-samples
4848

4949
## Related content
5050

51-
* [Use embeddings models](use-embeddings.md)
51+
* [Use image embeddings models](use-image-embeddings.md)
5252
* [Azure AI Model Inference API](.././reference/reference-model-inference-api.md)

articles/ai-foundry/model-inference/includes/code-create-chat-client-entra.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ client = ChatCompletionsClient(
2626
endpoint="https://<resource>.services.ai.azure.com/models",
2727
credential=DefaultAzureCredential(),
2828
credential_scopes=["https://cognitiveservices.azure.com/.default"],
29-
model="mistral-large-2407",
3029
)
3130
```
3231

@@ -50,7 +49,6 @@ const clientOptions = { credentials: { "https://cognitiveservices.azure.com" } }
5049
const client = new ModelClient(
5150
"https://<resource>.services.ai.azure.com/models",
5251
new DefaultAzureCredential(),
53-
"mistral-large-2407",
5452
clientOptions,
5553
);
5654
```
@@ -80,15 +78,14 @@ using Azure.AI.Inference;
8078
Then, you can use the package to consume the model. The following example shows how to create a client to consume chat completions with Entra ID:
8179

8280
```csharp
83-
var credential = new DefaultAzureCredential();
81+
TokenCredential credential = new DefaultAzureCredential();
8482
AzureAIInferenceClientOptions clientOptions = new AzureAIInferenceClientOptions();
8583
BearerTokenAuthenticationPolicy tokenPolicy = new BearerTokenAuthenticationPolicy(credential, new string[] { "https://cognitiveservices.azure.com/.default" });
8684
clientOptions.AddPolicy(tokenPolicy, HttpPipelinePosition.PerRetry);
8785

8886
ChatCompletionsClient client = new ChatCompletionsClient(
8987
new Uri("https://<resource>.services.ai.azure.com/models"),
9088
credential,
91-
"mistral-large-2407",
9289
clientOptions.
9390
);
9491
```
@@ -117,7 +114,6 @@ TokenCredential defaultCredential = new DefaultAzureCredentialBuilder().build();
117114
ChatCompletionsClient client = new ChatCompletionsClientBuilder()
118115
.credential(defaultCredential)
119116
.endpoint("https://<resource>.services.ai.azure.com/models")
120-
.model("mistral-large-2407")
121117
.buildClient();
122118
```
123119

articles/ai-foundry/model-inference/includes/use-chat-completions/csharp.md

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,23 @@ First, create the client to consume the model. The following code uses an endpoi
4747
ChatCompletionsClient client = new ChatCompletionsClient(
4848
new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
4949
new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL")),
50-
"mistral-large-2407"
5150
);
5251
```
5352

5453
If you have configured the resource to with **Microsoft Entra ID** support, you can use the following code snippet to create a client.
5554

5655

5756
```csharp
57+
TokenCredential credential = new DefaultAzureCredential(includeInteractiveCredentials: true);
58+
AzureAIInferenceClientOptions clientOptions = new AzureAIInferenceClientOptions();
59+
BearerTokenAuthenticationPolicy tokenPolicy = new BearerTokenAuthenticationPolicy(credential, new string[] { "https://cognitiveservices.azure.com/.default" });
60+
61+
clientOptions.AddPolicy(tokenPolicy, HttpPipelinePosition.PerRetry);
62+
5863
client = new ChatCompletionsClient(
5964
new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
60-
new DefaultAzureCredential(includeInteractiveCredentials: true),
61-
"mistral-large-2407"
65+
credential,
66+
clientOptions,
6267
);
6368
```
6469
@@ -73,6 +78,7 @@ ChatCompletionsOptions requestOptions = new ChatCompletionsOptions()
7378
new ChatRequestSystemMessage("You are a helpful assistant."),
7479
new ChatRequestUserMessage("How many languages are in the world?")
7580
},
81+
Model = "mistral-large-2407",
7682
};
7783
7884
Response<ChatCompletions> response = client.Complete(requestOptions);
@@ -122,7 +128,8 @@ static async Task StreamMessageAsync(ChatCompletionsClient client)
122128
new ChatRequestSystemMessage("You are a helpful assistant."),
123129
new ChatRequestUserMessage("How many languages are in the world? Write an essay about it.")
124130
},
125-
MaxTokens=4096
131+
MaxTokens=4096,
132+
Model = "mistral-large-2407",
126133
};
127134
128135
StreamingResponse<StreamingChatCompletionsUpdate> streamResponse = await client.CompleteStreamingAsync(requestOptions);
@@ -168,6 +175,7 @@ requestOptions = new ChatCompletionsOptions()
168175
new ChatRequestSystemMessage("You are a helpful assistant."),
169176
new ChatRequestUserMessage("How many languages are in the world?")
170177
},
178+
Model = "mistral-large-2407",
171179
PresencePenalty = 0.1f,
172180
FrequencyPenalty = 0.8f,
173181
MaxTokens = 2048,
@@ -202,7 +210,8 @@ requestOptions = new ChatCompletionsOptions()
202210
"How many languages are in the world?"
203211
)
204212
},
205-
ResponseFormat = new ChatCompletionsResponseFormatJSON()
213+
ResponseFormat = new ChatCompletionsResponseFormatJsonObject(),
214+
Model = "mistral-large-2407",
206215
};
207216
208217
response = client.Complete(requestOptions);
@@ -221,6 +230,7 @@ requestOptions = new ChatCompletionsOptions()
221230
new ChatRequestSystemMessage("You are a helpful assistant."),
222231
new ChatRequestUserMessage("How many languages are in the world?")
223232
},
233+
Model = "mistral-large-2407",
224234
AdditionalProperties = { { "logprobs", BinaryData.FromString("true") } },
225235
};
226236
@@ -295,7 +305,7 @@ var chatHistory = new List<ChatRequestMessage>(){
295305
new ChatRequestUserMessage("When is the next flight from Miami to Seattle?")
296306
};
297307
298-
requestOptions = new ChatCompletionsOptions(chatHistory);
308+
requestOptions = new ChatCompletionsOptions(chatHistory, model: "mistral-large-2407");
299309
requestOptions.Tools.Add(getFlightTool);
300310
requestOptions.ToolChoice = ChatCompletionsToolChoice.Auto;
301311
@@ -377,6 +387,7 @@ try
377387
"Chopping tomatoes and cutting them into cubes or wedges are great ways to practice your knife skills."
378388
),
379389
},
390+
Model = "mistral-large-2407",
380391
};
381392
382393
response = client.Complete(requestOptions);
@@ -436,6 +447,7 @@ ChatCompletionsOptions requestOptions = new ChatCompletionsOptions()
436447
]),
437448
},
438449
MaxTokens=2048,
450+
Model = "phi-3.5-vision-instruct",
439451
};
440452
441453
var response = client.Complete(requestOptions);
@@ -456,7 +468,7 @@ Console.WriteLine($"\tCompletion tokens: {response.Value.Usage.CompletionTokens}
456468
457469
```console
458470
ASSISTANT: The chart illustrates that larger models tend to perform better in quality, as indicated by their size in billions of parameters. However, there are exceptions to this trend, such as Phi-3-medium and Phi-3-small, which outperform smaller models in quality. This suggests that while larger models generally have an advantage, there might be other factors at play that influence a model's performance.
459-
Model: mistral-large-2407
471+
Model: phi-3.5-vision-instruct
460472
Usage:
461473
Prompt tokens: 2380
462474
Completion tokens: 126

articles/ai-foundry/model-inference/includes/use-chat-reasoning/csharp.md

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ First, create the client to consume the model. The following code uses an endpoi
4242
```csharp
4343
ChatCompletionsClient client = new ChatCompletionsClient(
4444
new Uri("https://<resource>.services.ai.azure.com/models"),
45-
new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL")),
46-
"DeepSeek-R1"
45+
new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
4746
);
4847
```
4948

@@ -53,10 +52,16 @@ ChatCompletionsClient client = new ChatCompletionsClient(
5352
If you have configured the resource to with **Microsoft Entra ID** support, you can use the following code snippet to create a client.
5453
5554
```csharp
55+
TokenCredential credential = new DefaultAzureCredential(includeInteractiveCredentials: true);
56+
AzureAIInferenceClientOptions clientOptions = new AzureAIInferenceClientOptions();
57+
BearerTokenAuthenticationPolicy tokenPolicy = new BearerTokenAuthenticationPolicy(credential, new string[] { "https://cognitiveservices.azure.com/.default" });
58+
59+
clientOptions.AddPolicy(tokenPolicy, HttpPipelinePosition.PerRetry);
60+
5661
client = new ChatCompletionsClient(
5762
new Uri("https://<resource>.services.ai.azure.com/models"),
58-
new DefaultAzureCredential(includeInteractiveCredentials: true),
59-
"DeepSeek-R1"
63+
credential,
64+
clientOptions,
6065
);
6166
```
6267
@@ -70,6 +75,7 @@ ChatCompletionsOptions requestOptions = new ChatCompletionsOptions()
7075
Messages = {
7176
new ChatRequestUserMessage("How many languages are in the world?")
7277
},
78+
Model = "deepseek-r1",
7379
};
7480
7581
Response<ChatCompletions> response = client.Complete(requestOptions);
@@ -148,7 +154,8 @@ static async Task StreamMessageAsync(ChatCompletionsClient client)
148154
Messages = {
149155
new ChatRequestUserMessage("How many languages are in the world?")
150156
},
151-
MaxTokens=4096
157+
MaxTokens=4096,
158+
Model = "deepseek-r1",
152159
};
153160
154161
StreamingResponse<StreamingChatCompletionsUpdate> streamResponse = await client.CompleteStreamingAsync(requestOptions);
@@ -225,6 +232,7 @@ try
225232
"Chopping tomatoes and cutting them into cubes or wedges are great ways to practice your knife skills."
226233
),
227234
},
235+
Model = "deepseek-r1",
228236
};
229237
230238
response = client.Complete(requestOptions);

articles/ai-foundry/model-inference/includes/use-embeddings/csharp.md

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,23 @@ First, create the client to consume the model. The following code uses an endpoi
4949
```csharp
5050
EmbeddingsClient client = new EmbeddingsClient(
5151
new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
52-
new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL")),
53-
"text-embedding-3-small"
52+
new AzureKeyCredential(Environment.GetEnvironmentVariable("AZURE_INFERENCE_CREDENTIAL"))
5453
);
5554
```
5655

57-
If you have configured the resource to with **Microsoft Entra ID** support, you can use the following code snippet to create a client.
58-
56+
If you configured the resource to with **Microsoft Entra ID** support, you can use the following code snippet to create a client. Note that here `includeInteractiveCredentials` is set to `true` only for demonstration purposes so authentication can happen using the web browser. On production workloads, you should remove such parameter.
5957

6058
```csharp
59+
TokenCredential credential = new DefaultAzureCredential(includeInteractiveCredentials: true);
60+
AzureAIInferenceClientOptions clientOptions = new AzureAIInferenceClientOptions();
61+
BearerTokenAuthenticationPolicy tokenPolicy = new BearerTokenAuthenticationPolicy(credential, new string[] { "https://cognitiveservices.azure.com/.default" });
62+
63+
clientOptions.AddPolicy(tokenPolicy, HttpPipelinePosition.PerRetry);
64+
6165
client = new EmbeddingsClient(
62-
new Uri(Environment.GetEnvironmentVariable("AZURE_INFERENCE_ENDPOINT")),
63-
new DefaultAzureCredential(includeInteractiveCredentials: true),
64-
"text-embedding-3-small"
66+
new Uri("https://<resource>.services.ai.azure.com/models"),
67+
credential,
68+
clientOptions,
6569
);
6670
```
6771
@@ -75,6 +79,7 @@ EmbeddingsOptions requestOptions = new EmbeddingsOptions()
7579
Input = {
7680
"The ultimate answer to the question of life"
7781
},
82+
Model = "text-embedding-3-small"
7883
};
7984
8085
Response<EmbeddingsResult> response = client.Embed(requestOptions);
@@ -104,6 +109,7 @@ EmbeddingsOptions requestOptions = new EmbeddingsOptions()
104109
"The ultimate answer to the question of life",
105110
"The largest planet in our solar system is Jupiter"
106111
},
112+
Model = "text-embedding-3-small"
107113
};
108114
109115
Response<EmbeddingsResult> response = client.Embed(requestOptions);
@@ -129,7 +135,12 @@ The following example shows how to create embeddings that are used to create an
129135
var input = new List<string> {
130136
"The answer to the ultimate question of life, the universe, and everything is 42"
131137
};
132-
var requestOptions = new EmbeddingsOptions(input, EmbeddingInputType.DOCUMENT);
138+
var requestOptions = new EmbeddingsOptions()
139+
{
140+
Input = input,
141+
InputType = EmbeddingInputType.DOCUMENT,
142+
Model = "text-embedding-3-small"
143+
};
133144
134145
Response<EmbeddingsResult> response = client.Embed(requestOptions);
135146
```
@@ -141,7 +152,12 @@ When you work on a query to retrieve such a document, you can use the following
141152
var input = new List<string> {
142153
"What's the ultimate meaning of life?"
143154
};
144-
var requestOptions = new EmbeddingsOptions(input, EmbeddingInputType.QUERY);
155+
var requestOptions = new EmbeddingsOptions()
156+
{
157+
Input = input,
158+
InputType = EmbeddingInputType.QUERY,
159+
Model = "text-embedding-3-small"
160+
};
145161
146162
Response<EmbeddingsResult> response = client.Embed(requestOptions);
147163
```

0 commit comments

Comments
 (0)