diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java
index e9af1435b6a..c8d61ae62f6 100644
--- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java
+++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java
@@ -233,6 +233,11 @@ public class OpenAiChatOptions implements ToolCallingChatOptions {
*/
private @JsonProperty("web_search_options") WebSearchOptions webSearchOptions;
+ /**
+ * Specifies the processing type used for serving the request.
+ */
+ private @JsonProperty("service_tier") String serviceTier;
+
/**
* Collection of {@link ToolCallback}s to be used for tool calling in the chat completion requests.
*/
@@ -301,6 +306,7 @@ public static OpenAiChatOptions fromOptions(OpenAiChatOptions fromOptions) {
.reasoningEffort(fromOptions.getReasoningEffort())
.webSearchOptions(fromOptions.getWebSearchOptions())
.verbosity(fromOptions.getVerbosity())
+ .serviceTier(fromOptions.getServiceTier())
.build();
}
@@ -605,6 +611,14 @@ public void setVerbosity(String verbosity) {
this.verbosity = verbosity;
}
+ public String getServiceTier() {
+ return serviceTier;
+ }
+
+ public void setServiceTier(String serviceTier) {
+ this.serviceTier = serviceTier;
+ }
+
@Override
public OpenAiChatOptions copy() {
return OpenAiChatOptions.fromOptions(this);
@@ -617,7 +631,7 @@ public int hashCode() {
this.streamOptions, this.seed, this.stop, this.temperature, this.topP, this.tools, this.toolChoice,
this.user, this.parallelToolCalls, this.toolCallbacks, this.toolNames, this.httpHeaders,
this.internalToolExecutionEnabled, this.toolContext, this.outputModalities, this.outputAudio,
- this.store, this.metadata, this.reasoningEffort, this.webSearchOptions);
+ this.store, this.metadata, this.reasoningEffort, this.webSearchOptions, this.serviceTier);
}
@Override
@@ -651,7 +665,8 @@ public boolean equals(Object o) {
&& Objects.equals(this.metadata, other.metadata)
&& Objects.equals(this.reasoningEffort, other.reasoningEffort)
&& Objects.equals(this.webSearchOptions, other.webSearchOptions)
- && Objects.equals(this.verbosity, other.verbosity);
+ && Objects.equals(this.verbosity, other.verbosity)
+ && Objects.equals(this.serviceTier, other.serviceTier);
}
@Override
@@ -909,6 +924,16 @@ public Builder verbosity(String verbosity) {
return this;
}
+ public Builder serviceTier(String serviceTier) {
+ this.options.serviceTier = serviceTier;
+ return this;
+ }
+
+ public Builder serviceTier(OpenAiApi.ServiceTier serviceTier) {
+ this.options.serviceTier = serviceTier.getValue();
+ return this;
+ }
+
public OpenAiChatOptions build() {
return this.options;
}
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
index 1d02b206cc8..98541792d32 100644
--- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
+++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
@@ -1345,6 +1345,41 @@ public record Approximate(@JsonProperty("city") String city, @JsonProperty("coun
} // @formatter:on
+ /**
+ * Specifies the processing type used for serving the request.
+ */
+ public enum ServiceTier {
+
+ /**
+ * Then the request will be processed with the service tier configured in the
+ * Project settings.
+ */
+ AUTO("auto"),
+ /**
+ * Then the request will be processed with the standard pricing.
+ */
+ DEFAULT("default"),
+ /**
+ * Then the request will be processed with the flex pricing.
+ */
+ FLEX("flex"),
+ /**
+ * Then the request will be processed with the priority pricing.
+ */
+ PRIORITY("priority");
+
+ private final String value;
+
+ private ServiceTier(String value) {
+ this.value = value;
+ }
+
+ public String getValue() {
+ return value;
+ }
+
+ }
+
/**
* Message comprising the conversation.
*
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java
index 3072e240305..7a6a84bae37 100644
--- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java
+++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java
@@ -26,6 +26,7 @@
import org.springframework.ai.openai.api.OpenAiApi;
import org.springframework.ai.openai.api.OpenAiApi.ChatCompletionRequest.AudioParameters;
import org.springframework.ai.openai.api.OpenAiApi.ChatCompletionRequest.StreamOptions;
+import org.springframework.ai.openai.api.OpenAiApi.ServiceTier;
import org.springframework.ai.openai.api.ResponseFormat;
import static org.assertj.core.api.Assertions.assertThat;
@@ -83,6 +84,7 @@ void testBuilderWithAllFields() {
.internalToolExecutionEnabled(false)
.httpHeaders(Map.of("header1", "value1"))
.toolContext(toolContext)
+ .serviceTier(ServiceTier.PRIORITY)
.build();
assertThat(options)
@@ -90,10 +92,11 @@ void testBuilderWithAllFields() {
"maxCompletionTokens", "n", "outputModalities", "outputAudio", "presencePenalty", "responseFormat",
"streamOptions", "seed", "stop", "temperature", "topP", "tools", "toolChoice", "user",
"parallelToolCalls", "store", "metadata", "reasoningEffort", "internalToolExecutionEnabled",
- "httpHeaders", "toolContext")
+ "httpHeaders", "toolContext", "serviceTier")
.containsExactly("test-model", 0.5, logitBias, true, 5, null, 50, 2, outputModalities, outputAudio, 0.8,
responseFormat, streamOptions, 12345, stopSequences, 0.7, 0.9, tools, toolChoice, "test-user", true,
- false, metadata, "medium", false, Map.of("header1", "value1"), toolContext);
+ false, metadata, "medium", false, Map.of("header1", "value1"), toolContext,
+ ServiceTier.PRIORITY.getValue());
assertThat(options.getStreamUsage()).isTrue();
assertThat(options.getStreamOptions()).isEqualTo(StreamOptions.INCLUDE_USAGE);
@@ -141,6 +144,7 @@ void testCopy() {
.reasoningEffort("low")
.internalToolExecutionEnabled(true)
.httpHeaders(Map.of("header1", "value1"))
+ .serviceTier(ServiceTier.DEFAULT)
.build();
OpenAiChatOptions copiedOptions = originalOptions.copy();
@@ -189,6 +193,7 @@ void testSetters() {
options.setReasoningEffort("high");
options.setInternalToolExecutionEnabled(false);
options.setHttpHeaders(Map.of("header2", "value2"));
+ options.setServiceTier(ServiceTier.DEFAULT.getValue());
assertThat(options.getModel()).isEqualTo("test-model");
assertThat(options.getFrequencyPenalty()).isEqualTo(0.5);
@@ -223,6 +228,7 @@ void testSetters() {
options.setStopSequences(List.of("s1", "s2"));
assertThat(options.getStopSequences()).isEqualTo(List.of("s1", "s2"));
assertThat(options.getStop()).isEqualTo(List.of("s1", "s2"));
+ assertThat(options.getServiceTier()).isEqualTo("default");
}
@Test
@@ -258,6 +264,7 @@ void testDefaultValues() {
assertThat(options.getToolContext()).isEqualTo(new HashMap<>());
assertThat(options.getStreamUsage()).isFalse();
assertThat(options.getStopSequences()).isNull();
+ assertThat(options.getServiceTier()).isNull();
}
@Test
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java
index e9fda577c8f..d050a621034 100644
--- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java
+++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java
@@ -218,4 +218,22 @@ void chatCompletionEntityWithGpt5ChatAndTemperatureShouldSucceed(OpenAiApi.ChatM
assertThat(response.getBody().model()).containsIgnoringCase(modelName.getValue());
}
+ @ParameterizedTest(name = "{0} : {displayName}")
+ @EnumSource(names = { "DEFAULT", "PRIORITY" })
+ void chatCompletionEntityWithServiceTier(OpenAiApi.ServiceTier serviceTier) {
+ ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage(
+ "What is the answer to the ultimate question of life, the universe, and everything?", Role.USER);
+
+ ChatCompletionRequest request = new ChatCompletionRequest(List.of(chatCompletionMessage), // messages
+ OpenAiApi.ChatModel.GPT_4_O.value, null, null, null, null, null, null, null, null, null, null, null,
+ null, null, null, serviceTier.getValue(), null, false, null, 1.0, null, null, null, null, null, null,
+ null, null);
+
+ ResponseEntity response = this.openAiApi.chatCompletionEntity(request);
+
+ assertThat(response).isNotNull();
+ assertThat(response.getBody()).isNotNull();
+ assertThat(response.getBody().serviceTier()).containsIgnoringCase(serviceTier.getValue());
+ }
+
}
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java
index 79ed1426890..14c3b5ad1b3 100644
--- a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java
+++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java
@@ -23,6 +23,7 @@
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
+import org.springframework.ai.openai.api.OpenAiApi.ServiceTier;
/**
* Unit tests for {@link OpenAiStreamFunctionCallingHelper}
@@ -36,7 +37,7 @@ public class OpenAiStreamFunctionCallingHelperTest {
@Test
public void merge_whenInputIsValid() {
var expectedResult = new OpenAiApi.ChatCompletionChunk("id", Collections.emptyList(),
- System.currentTimeMillis(), "model", "serviceTier", "fingerPrint", "object", null);
+ System.currentTimeMillis(), "model", "default", "fingerPrint", "object", null);
var previous = new OpenAiApi.ChatCompletionChunk(null, null, expectedResult.created(), expectedResult.model(),
expectedResult.serviceTier(), null, null, null);
var current = new OpenAiApi.ChatCompletionChunk(expectedResult.id(), null, null, null, null,
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc
index 41b6d9fa2e9..872aff1fea2 100644
--- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc
+++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc
@@ -177,6 +177,7 @@ The `JSON_SCHEMA` type enables link:https://platform.openai.com/docs/guides/stru
| spring.ai.openai.chat.options.parallel-tool-calls | Whether to enable link:https://platform.openai.com/docs/guides/function-calling/parallel-function-calling[parallel function calling] during tool use. | true
| spring.ai.openai.chat.options.http-headers | Optional HTTP headers to be added to the chat completion request. To override the `api-key` you need to use an `Authorization` header key, and you have to prefix the key value with the `Bearer` prefix. | -
| spring.ai.openai.chat.options.proxy-tool-calls | If true, the Spring AI will not handle the function calls internally, but will proxy them to the client. Then is the client's responsibility to handle the function calls, dispatch them to the appropriate function, and return the results. If false (the default), the Spring AI will handle the function calls internally. Applicable only for chat models with function calling support | false
+| spring.ai.openai.chat.options.service-tier | Specifies the link:https://platform.openai.com/docs/api-reference/responses/create#responses_create-service_tier[processing type] used for serving the request. | -
|====
[NOTE]