Skip to content

Commit 88432ec

Browse files
committed
GH-3300 | Add max_completion_tokens to Azure OpenAI configuration options
Signed-off-by: Oskar Drozda <[email protected]>
1 parent 8caffe8 commit 88432ec

File tree

3 files changed

+47
-13
lines changed

3 files changed

+47
-13
lines changed

models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatModel.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,11 @@ private ChatCompletionsOptions merge(ChatCompletionsOptions fromAzureOptions,
720720
mergedAzureOptions.setMaxTokens((fromAzureOptions.getMaxTokens() != null) ? fromAzureOptions.getMaxTokens()
721721
: toSpringAiOptions.getMaxTokens());
722722

723+
if (fromAzureOptions.getMaxCompletionTokens() != null || toSpringAiOptions.getMaxCompletionTokens() != null) {
724+
mergedAzureOptions.setMaxCompletionTokens((fromAzureOptions.getMaxCompletionTokens() != null)
725+
? fromAzureOptions.getMaxCompletionTokens() : toSpringAiOptions.getMaxCompletionTokens());
726+
}
727+
723728
mergedAzureOptions.setLogitBias(fromAzureOptions.getLogitBias() != null ? fromAzureOptions.getLogitBias()
724729
: toSpringAiOptions.getLogitBias());
725730

@@ -803,6 +808,10 @@ private ChatCompletionsOptions merge(AzureOpenAiChatOptions fromSpringAiOptions,
803808
mergedAzureOptions.setMaxTokens(fromSpringAiOptions.getMaxTokens());
804809
}
805810

811+
if (fromSpringAiOptions.getMaxCompletionTokens() != null) {
812+
mergedAzureOptions.setMaxCompletionTokens(fromSpringAiOptions.getMaxCompletionTokens());
813+
}
814+
806815
if (fromSpringAiOptions.getLogitBias() != null) {
807816
mergedAzureOptions.setLogitBias(fromSpringAiOptions.getLogitBias());
808817
}
@@ -894,6 +903,9 @@ private ChatCompletionsOptions copy(ChatCompletionsOptions fromOptions) {
894903
if (fromOptions.getMaxTokens() != null) {
895904
copyOptions.setMaxTokens(fromOptions.getMaxTokens());
896905
}
906+
if (fromOptions.getMaxCompletionTokens() != null) {
907+
copyOptions.setMaxCompletionTokens(fromOptions.getMaxCompletionTokens());
908+
}
897909
if (fromOptions.getLogitBias() != null) {
898910
copyOptions.setLogitBias(fromOptions.getLogitBias());
899911
}

models/spring-ai-azure-openai/src/main/java/org/springframework/ai/azure/openai/AzureOpenAiChatOptions.java

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2023-2024 the original author or authors.
2+
* Copyright 2023-2025 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -167,6 +167,13 @@ public class AzureOpenAiChatOptions implements ToolCallingChatOptions {
167167
@JsonProperty("top_log_probs")
168168
private Integer topLogProbs;
169169

170+
/*
171+
* An upper bound for the number of tokens that can be generated for a completion,
172+
* including visible output tokens and reasoning tokens.
173+
*/
174+
@JsonProperty("max_completion_tokens")
175+
private Integer maxCompletionTokens;
176+
170177
/*
171178
* If provided, the configuration options for available Azure OpenAI chat
172179
* enhancements.
@@ -266,6 +273,7 @@ public static AzureOpenAiChatOptions fromOptions(AzureOpenAiChatOptions fromOpti
266273
.frequencyPenalty(fromOptions.getFrequencyPenalty() != null ? fromOptions.getFrequencyPenalty() : null)
267274
.logitBias(fromOptions.getLogitBias())
268275
.maxTokens(fromOptions.getMaxTokens())
276+
.maxCompletionTokens(fromOptions.getMaxCompletionTokens())
269277
.N(fromOptions.getN())
270278
.presencePenalty(fromOptions.getPresencePenalty() != null ? fromOptions.getPresencePenalty() : null)
271279
.stop(fromOptions.getStop() != null ? new ArrayList<>(fromOptions.getStop()) : null)
@@ -300,6 +308,14 @@ public void setMaxTokens(Integer maxTokens) {
300308
this.maxTokens = maxTokens;
301309
}
302310

311+
public Integer getMaxCompletionTokens() {
312+
return this.maxCompletionTokens;
313+
}
314+
315+
public void setMaxCompletionTokens(Integer maxCompletionTokens) {
316+
this.maxCompletionTokens = maxCompletionTokens;
317+
}
318+
303319
public Map<String, Integer> getLogitBias() {
304320
return this.logitBias;
305321
}
@@ -510,6 +526,7 @@ public boolean equals(Object o) {
510526
&& Objects.equals(this.enableStreamUsage, that.enableStreamUsage)
511527
&& Objects.equals(this.reasoningEffort, that.reasoningEffort)
512528
&& Objects.equals(this.toolContext, that.toolContext) && Objects.equals(this.maxTokens, that.maxTokens)
529+
&& Objects.equals(this.maxCompletionTokens, that.maxCompletionTokens)
513530
&& Objects.equals(this.frequencyPenalty, that.frequencyPenalty)
514531
&& Objects.equals(this.presencePenalty, that.presencePenalty)
515532
&& Objects.equals(this.temperature, that.temperature) && Objects.equals(this.topP, that.topP);
@@ -520,8 +537,8 @@ public int hashCode() {
520537
return Objects.hash(this.logitBias, this.user, this.n, this.stop, this.deploymentName, this.responseFormat,
521538
this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, this.seed, this.logprobs,
522539
this.topLogProbs, this.enhancements, this.streamOptions, this.reasoningEffort, this.enableStreamUsage,
523-
this.toolContext, this.maxTokens, this.frequencyPenalty, this.presencePenalty, this.temperature,
524-
this.topP);
540+
this.toolContext, this.maxTokens, this.maxCompletionTokens, this.frequencyPenalty, this.presencePenalty,
541+
this.temperature, this.topP);
525542
}
526543

527544
public static class Builder {
@@ -556,6 +573,11 @@ public Builder maxTokens(Integer maxTokens) {
556573
return this;
557574
}
558575

576+
public Builder maxCompletionTokens(Integer maxCompletionTokens) {
577+
this.options.maxCompletionTokens = maxCompletionTokens;
578+
return this;
579+
}
580+
559581
public Builder N(Integer n) {
560582
this.options.n = n;
561583
return this;

spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/azure-openai-chat.adoc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,12 @@ This is because in OpenAI there is no `Deployment Name`, only a `Model Name`.
113113

114114
NOTE: The property `spring.ai.azure.openai.chat.options.model` has been renamed to `spring.ai.azure.openai.chat.options.deployment-name`.
115115

116-
NOTE: If you decide to connect to `OpenAI` instead of `Azure OpenAI`, by setting the `spring.ai.azure.openai.openai-api-key=<Your OpenAI Key>` property,
116+
NOTE: If you decide to connect to `OpenAI` instead of `Azure OpenAI`, by setting the `spring.ai.azure.openai.openai-api-key=<Your OpenAI Key>` property,
117117
then the `spring.ai.azure.openai.chat.options.deployment-name` is treated as an link:https://platform.openai.com/docs/models[OpenAI model] name.
118118

119119
==== Access the OpenAI Model
120120

121-
You can configure the client to use directly `OpenAI` instead of the `Azure OpenAI` deployed models.
121+
You can configure the client to use directly `OpenAI` instead of the `Azure OpenAI` deployed models.
122122
For this you need to set the `spring.ai.azure.openai.openai-api-key=<Your OpenAI Key>` instead of `spring.ai.azure.openai.api-key=<Your Azure OpenAi Key>`.
123123

124124
=== Add Repositories and BOM
@@ -197,8 +197,8 @@ The prefix `spring.ai.azure.openai` is the property prefix to configure the conn
197197

198198
| spring.ai.azure.openai.api-key | The Key from Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
199199
| spring.ai.azure.openai.endpoint | The endpoint from the Azure AI OpenAI `Keys and Endpoint` section under `Resource Management` | -
200-
| spring.ai.azure.openai.openai-api-key | (non Azure) OpenAI API key. Used to authenticate with the OpenAI service, instead of Azure OpenAI.
201-
This automatically sets the endpoint to https://api.openai.com/v1. Use either `api-key` or `openai-api-key` property.
200+
| spring.ai.azure.openai.openai-api-key | (non Azure) OpenAI API key. Used to authenticate with the OpenAI service, instead of Azure OpenAI.
201+
This automatically sets the endpoint to https://api.openai.com/v1. Use either `api-key` or `openai-api-key` property.
202202
With this configuration the `spring.ai.azure.openai.chat.options.deployment-name` is treated as an https://platform.openai.com/docs/models[OpenAi Model] name.| -
203203
| spring.ai.azure.openai.custom-headers | A map of custom headers to be included in the API requests. Each entry in the map represents a header, where the key is the header name and the value is the header value. | Empty map
204204
|====
@@ -223,11 +223,12 @@ The prefix `spring.ai.azure.openai.chat` is the property prefix that configures
223223
| spring.ai.azure.openai.chat.enabled (Removed and no longer valid) | Enable Azure OpenAI chat model. | true
224224
| spring.ai.model.chat | Enable Azure OpenAI chat model. | azure-openai
225225
| spring.ai.azure.openai.chat.options.deployment-name | In use with Azure, this refers to the "Deployment Name" of your model, which you can find at https://oai.azure.com/portal.
226-
It's important to note that within an Azure OpenAI deployment, the "Deployment Name" is distinct from the model itself.
227-
The confusion around these terms stems from the intention to make the Azure OpenAI client library compatible with the original OpenAI endpoint.
226+
It's important to note that within an Azure OpenAI deployment, the "Deployment Name" is distinct from the model itself.
227+
The confusion around these terms stems from the intention to make the Azure OpenAI client library compatible with the original OpenAI endpoint.
228228
The deployment structures offered by Azure OpenAI and Sam Altman's OpenAI differ significantly.
229229
Deployments model name to provide as part of this completions request. | gpt-4o
230-
| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate. | -
230+
| spring.ai.azure.openai.chat.options.maxTokens | The maximum number of tokens to generate in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length. | -
231+
| spring.ai.azure.openai.chat.options.maxCompletionTokens | An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. | -
231232
| spring.ai.azure.openai.chat.options.temperature | The sampling temperature to use that controls the apparent creativity of generated completions. Higher values will make output more random while lower values will make results more focused and deterministic. It is not recommended to modify temperature and top_p for the same completions request as the interaction of these two settings is difficult to predict. | 0.7
232233
| spring.ai.azure.openai.chat.options.topP | An alternative to sampling with temperature called nucleus sampling. This value causes the model to consider the results of tokens with the provided probability mass. | -
233234
| spring.ai.azure.openai.chat.options.logitBias | A map between GPT token IDs and bias scores that influences the probability of specific tokens appearing in a completions response. Token IDs are computed via external tokenizer tools, while bias scores reside in the range of -100 to 100 with minimum and maximum values corresponding to a full ban or exclusive selection of a token, respectively. The exact behavior of a given bias score varies by model. | -
@@ -409,9 +410,9 @@ var openAIClientBuilder = new OpenAIClientBuilder()
409410
.endpoint(System.getenv("AZURE_OPENAI_ENDPOINT"));
410411
411412
var openAIChatOptions = AzureOpenAiChatOptions.builder()
412-
.deploymentName("gpt-4o")
413+
.deploymentName("gpt-5")
413414
.temperature(0.4)
414-
.maxTokens(200)
415+
.maxCompletionTokens(200)
415416
.build();
416417
417418
var chatModel = AzureOpenAiChatModel.builder()
@@ -429,4 +430,3 @@ Flux<ChatResponse> streamingResponses = chatModel.stream(
429430
----
430431

431432
NOTE: the `gpt-4o` is actually the `Deployment Name` as presented in the Azure AI Portal.
432-

0 commit comments

Comments
 (0)