feat: add gpt-5 models (#4086)

apappascs · web-flow · commit fef454bd392e · 2025-08-14T21:10:16.000-04:00
- update gpt-5 tests - add verbosity parameter - udpate documentation and add tests Fixes #4086 Auto-cherry-pick to 1.0.x Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java
@@ -196,6 +196,15 @@ public class OpenAiChatOptions implements ToolCallingChatOptions {
 	 */
 	private @JsonProperty("reasoning_effort") String reasoningEffort;
 
+	/**
+	 * verbosity: string or null
+	 * Optional - Defaults to medium
+	 * Constrains the verbosity of the model's response. Lower values will result in more concise responses, while higher values will result in more verbose responses.
+	 * Currently supported values are low, medium, and high.
+	 * If specified, the model will use web search to find relevant information to answer the user's question.
+	 */
+	private @JsonProperty("verbosity") String verbosity;
+
 	/**
 	 * This tool searches the web for relevant results to use in a response.
 	 */
@@ -268,6 +277,7 @@ public static OpenAiChatOptions fromOptions(OpenAiChatOptions fromOptions) {
 			.metadata(fromOptions.getMetadata())
 			.reasoningEffort(fromOptions.getReasoningEffort())
 			.webSearchOptions(fromOptions.getWebSearchOptions())
+			.verbosity(fromOptions.getVerbosity())
 			.build();
 	}
 
@@ -564,6 +574,14 @@ public void setWebSearchOptions(WebSearchOptions webSearchOptions) {
 		this.webSearchOptions = webSearchOptions;
 	}
 
+	public String getVerbosity() {
+		return this.verbosity;
+	}
+
+	public void setVerbosity(String verbosity) {
+		this.verbosity = verbosity;
+	}
+
 	@Override
 	public OpenAiChatOptions copy() {
 		return OpenAiChatOptions.fromOptions(this);
@@ -609,7 +627,8 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.outputAudio, other.outputAudio) && Objects.equals(this.store, other.store)
 				&& Objects.equals(this.metadata, other.metadata)
 				&& Objects.equals(this.reasoningEffort, other.reasoningEffort)
-				&& Objects.equals(this.webSearchOptions, other.webSearchOptions);
+				&& Objects.equals(this.webSearchOptions, other.webSearchOptions)
+				&& Objects.equals(this.verbosity, other.verbosity);
 	}
 
 	@Override
@@ -802,6 +821,11 @@ public Builder webSearchOptions(WebSearchOptions webSearchOptions) {
 			return this;
 		}
 
+		public Builder verbosity(String verbosity) {
+			this.options.verbosity = verbosity;
+			return this;
+		}
+
 		public OpenAiChatOptions build() {
 			return this.options;
 		}
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
@@ -482,18 +482,37 @@ public enum ChatModel implements ChatModelDescription {
 		GPT_5("gpt-5"),
 
 		/**
-		 * <b>GPT-5 (2025-08-07)</b> is a specific snapshot of the GPT-5 model from August
-		 * 7, 2025, providing enhanced capabilities for complex reasoning and
-		 * problem-solving tasks.
+		 * GPT-5 mini is a faster, more cost-efficient version of GPT-5. It's great for
+		 * well-defined tasks and precise prompts.
 		 * <p>
-		 * Note: GPT-5 models require temperature=1.0 (default value). Custom temperature
-		 * values are not supported and will cause errors.
+		 * Model ID: gpt-5-mini
+		 * <p>
+		 * See:
+		 * <a href="https://platform.openai.com/docs/models/gpt-5-mini">gpt-5-mini</a>
+		 */
+		GPT_5_MINI("gpt-5-mini"),
+
+		/**
+		 * GPT-5 Nano is the fastest, cheapest version of GPT-5. It's great for
+		 * summarization and classification tasks.
 		 * <p>
-		 * Model ID: gpt-5-2025-08-07
+		 * Model ID: gpt-5-nano
 		 * <p>
-		 * See: <a href="https://platform.openai.com/docs/models/gpt-5">gpt-5</a>
+		 * See:
+		 * <a href="https://platform.openai.com/docs/models/gpt-5-nano">gpt-5-nano</a>
+		 */
+		GPT_5_NANO("gpt-5-nano"),
+
+		/**
+		 * GPT-5 Chat points to the GPT-5 snapshot currently used in ChatGPT. GPT-5
+		 * accepts both text and image inputs, and produces text outputs.
+		 * <p>
+		 * Model ID: gpt-5-chat-latest
+		 * <p>
+		 * See: <a href=
+		 * "https://platform.openai.com/docs/models/gpt-5-chat-latest">gpt-5-chat-latest</a>
 		 */
-		GPT_5_2025_08_07("gpt-5-2025-08-07"),
+		GPT_5_CHAT_LATEST("gpt-5-chat-latest"),
 
 		/**
 		 * <b>GPT-4o</b> (“o” for “omni”) is the versatile, high-intelligence flagship
@@ -1064,6 +1083,7 @@ public enum OutputModality {
 	 * Currently supported values are low, medium, and high. Reducing reasoning effort can
 	 * result in faster responses and fewer tokens used on reasoning in a response.
 	 * @param webSearchOptions Options for web search.
+	 * @param verbosity Controls the verbosity of the model's response.
 	 */
 	@JsonInclude(Include.NON_NULL)
 	public record ChatCompletionRequest(// @formatter:off
@@ -1094,7 +1114,8 @@ public record ChatCompletionRequest(// @formatter:off
 			@JsonProperty("parallel_tool_calls") Boolean parallelToolCalls,
 			@JsonProperty("user") String user,
 			@JsonProperty("reasoning_effort") String reasoningEffort,
-			@JsonProperty("web_search_options") WebSearchOptions webSearchOptions) {
+			@JsonProperty("web_search_options") WebSearchOptions webSearchOptions,
+			@JsonProperty("verbosity") String verbosity)  {
 
 		/**
 		 * Shortcut constructor for a chat completion request with the given messages, model and temperature.
@@ -1106,7 +1127,7 @@ public record ChatCompletionRequest(// @formatter:off
 		public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model, Double temperature) {
 			this(messages, model, null, null, null, null, null, null, null, null, null, null, null, null, null,
 					null, null, null, false, null, temperature, null,
-					null, null, null, null, null, null);
+					null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -1120,7 +1141,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
 			this(messages, model, null, null, null, null, null, null,
 					null, null, null, List.of(OutputModality.AUDIO, OutputModality.TEXT), audio, null, null,
 					null, null, null, stream, null, null, null,
-					null, null, null, null, null, null);
+					null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -1135,7 +1156,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
 		public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model, Double temperature, boolean stream) {
 			this(messages, model, null, null, null, null, null, null, null, null, null,
 					null, null, null, null, null, null, null, stream, null, temperature, null,
-					null, null, null, null, null, null);
+					null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -1151,7 +1172,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
 				List<FunctionTool> tools, Object toolChoice) {
 			this(messages, model, null, null, null, null, null, null, null, null, null,
 					null, null, null, null, null, null, null, false, null, 0.8, null,
-					tools, toolChoice, null, null, null, null);
+					tools, toolChoice, null, null, null, null, null);
 		}
 
 		/**
@@ -1164,7 +1185,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
 		public ChatCompletionRequest(List<ChatCompletionMessage> messages, Boolean stream) {
 			this(messages, null, null, null, null, null, null, null, null, null, null,
 					null, null, null, null, null, null, null, stream, null, null, null,
-					null, null, null, null, null, null);
+					null, null, null, null, null, null, null);
 		}
 
 		/**
@@ -1177,7 +1198,7 @@ public ChatCompletionRequest streamOptions(StreamOptions streamOptions) {
 			return new ChatCompletionRequest(this.messages, this.model, this.store, this.metadata, this.frequencyPenalty, this.logitBias, this.logprobs,
 			this.topLogprobs, this.maxTokens, this.maxCompletionTokens, this.n, this.outputModalities, this.audioParameters, this.presencePenalty,
 			this.responseFormat, this.seed, this.serviceTier, this.stop, this.stream, streamOptions, this.temperature, this.topP,
-			this.tools, this.toolChoice, this.parallelToolCalls, this.user, this.reasoningEffort, this.webSearchOptions);
+			this.tools, this.toolChoice, this.parallelToolCalls, this.user, this.reasoningEffort, this.webSearchOptions, this.verbosity);
 		}
 
 		/**
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java
@@ -77,7 +77,7 @@ void validateReasoningTokens() {
 				"If a train travels 100 miles in 2 hours, what is its average speed?", ChatCompletionMessage.Role.USER);
 		ChatCompletionRequest request = new ChatCompletionRequest(List.of(userMessage), "o1", null, null, null, null,
 				null, null, null, null, null, null, null, null, null, null, null, null, false, null, null, null, null,
-				null, null, null, "low", null);
+				null, null, null, "low", null, null);
 		ResponseEntity<ChatCompletion> response = this.openAiApi.chatCompletionEntity(request);
 
 		assertThat(response).isNotNull();
@@ -159,7 +159,7 @@ void streamOutputAudio() {
 	}
 
 	@ParameterizedTest(name = "{0} : {displayName}")
-	@EnumSource(names = { "GPT_5", "GPT_5_2025_08_07" })
+	@EnumSource(names = { "GPT_5", "GPT_5_CHAT_LATEST", "GPT_5_MINI", "GPT_5_NANO" })
 	void chatCompletionEntityWithNewModels(OpenAiApi.ChatModel modelName) {
 		ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage("Hello world", Role.USER);
 		ResponseEntity<ChatCompletion> response = this.openAiApi.chatCompletionEntity(
@@ -172,4 +172,50 @@ void chatCompletionEntityWithNewModels(OpenAiApi.ChatModel modelName) {
 		assertThat(response.getBody().model()).containsIgnoringCase(modelName.getValue());
 	}
 
+	@ParameterizedTest(name = "{0} : {displayName}")
+	@EnumSource(names = { "GPT_5_NANO" })
+	void chatCompletionEntityWithNewModelsAndLowVerbosity(OpenAiApi.ChatModel modelName) {
+		ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage(
+				"What is the answer to the ultimate question of life, the universe, and everything?", Role.USER);
+
+		ChatCompletionRequest request = new ChatCompletionRequest(List.of(chatCompletionMessage), // messages
+				modelName.getValue(), null, null, null, null, null, null, null, null, null, null, null, null, null,
+				null, null, null, false, null, 1.0, null, null, null, null, null, null, null, "low");
+
+		ResponseEntity<ChatCompletion> response = this.openAiApi.chatCompletionEntity(request);
+
+		assertThat(response).isNotNull();
+		assertThat(response.getBody()).isNotNull();
+		assertThat(response.getBody().choices()).isNotEmpty();
+		assertThat(response.getBody().choices().get(0).message().content()).isNotEmpty();
+		assertThat(response.getBody().model()).containsIgnoringCase(modelName.getValue());
+	}
+
+	@ParameterizedTest(name = "{0} : {displayName}")
+	@EnumSource(names = { "GPT_5", "GPT_5_MINI", "GPT_5_NANO" })
+	void chatCompletionEntityWithGpt5ModelsAndTemperatureShouldFail(OpenAiApi.ChatModel modelName) {
+		ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage("Hello world", Role.USER);
+		ChatCompletionRequest request = new ChatCompletionRequest(List.of(chatCompletionMessage), modelName.getValue(),
+				0.8);
+
+		assertThatThrownBy(() -> this.openAiApi.chatCompletionEntity(request)).isInstanceOf(RuntimeException.class)
+			.hasMessageContaining("Unsupported value");
+	}
+
+	@ParameterizedTest(name = "{0} : {displayName}")
+	@EnumSource(names = { "GPT_5_CHAT_LATEST" })
+	void chatCompletionEntityWithGpt5ChatAndTemperatureShouldSucceed(OpenAiApi.ChatModel modelName) {
+		ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage("Hello world", Role.USER);
+		ChatCompletionRequest request = new ChatCompletionRequest(List.of(chatCompletionMessage), modelName.getValue(),
+				0.8);
+
+		ResponseEntity<ChatCompletion> response = this.openAiApi.chatCompletionEntity(request);
+
+		assertThat(response).isNotNull();
+		assertThat(response.getBody()).isNotNull();
+		assertThat(response.getBody().choices()).isNotEmpty();
+		assertThat(response.getBody().choices().get(0).message().content()).isNotEmpty();
+		assertThat(response.getBody().model()).containsIgnoringCase(modelName.getValue());
+	}
+
 }
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc
@@ -181,7 +181,10 @@ The `JSON_SCHEMA` type enables link:https://platform.openai.com/docs/guides/stru
 
 [NOTE]
 ====
-When using GPT-5 models (`gpt-5`, `gpt-5-2025-08-07`), the temperature parameter must be set to `1.0` (the default value). These models do not support custom temperature values and will return an error if any other temperature value is specified.
+When using GPT-5 models such as `gpt-5`, `gpt-5-mini`, and `gpt-5-nano`, the `temperature` parameter is not supported.
+These models are optimized for reasoning and do not use temperature.
+Specifying a temperature value will result in an error.
+In contrast, conversational models like `gpt-5-chat` do support the `temperature` parameter.
 ====
 
 NOTE: You can override the common `spring.ai.openai.base-url` and `spring.ai.openai.api-key` for the `ChatModel` and `EmbeddingModel` implementations.