Introduce reasoningEffort and serviceTier config support for OpenAI

geoand · geoand · commit 101ff67b61f5 · 2025-10-27T10:22:31.000+02:00
Closes: #1870
diff --git a/docs/modules/ROOT/pages/includes/quarkus-langchain4j-openai.adoc b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-openai.adoc
@@ -574,6 +574,54 @@ endif::add-copy-button-to-env-var[]
 |list of string
 |
 
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-reasoning-effort]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-reasoning-effort[`quarkus.langchain4j.openai.chat-model.reasoning-effort`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai.chat-model.reasoning-effort+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Constrains effort on reasoning for reasoning models. Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+
+Note: The `gpt-5-pro` model defaults to (and only supports) high reasoning effort.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_REASONING_EFFORT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_REASONING_EFFORT+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|
+
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-service-tier]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-service-tier[`quarkus.langchain4j.openai.chat-model.service-tier`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai.chat-model.service-tier+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Specifies the processing type used for serving the request.
+
+If set to `auto`, then the request will be processed with the service tier configured in the Project settings. If set to `default`, then the request will be processed with the standard pricing and performance for the selected model. If set to `flex` or `priority`, then the request will be processed with the corresponding service tier. When not set, the default behavior is `auto`.
+
+When the service tier parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_SERVICE_TIER+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_SERVICE_TIER+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|`default`
+
 a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-embedding-model-model-name]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-embedding-model-model-name[`quarkus.langchain4j.openai.embedding-model.model-name`]##
 ifdef::add-copy-button-to-config-props[]
 config_property_copy_button:+++quarkus.langchain4j.openai.embedding-model.model-name+++[]
@@ -1457,6 +1505,54 @@ endif::add-copy-button-to-env-var[]
 |list of string
 |
 
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-reasoning-effort]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-reasoning-effort[`quarkus.langchain4j.openai."model-name".chat-model.reasoning-effort`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai."model-name".chat-model.reasoning-effort+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Constrains effort on reasoning for reasoning models. Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+
+Note: The `gpt-5-pro` model defaults to (and only supports) high reasoning effort.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_REASONING_EFFORT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_REASONING_EFFORT+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|
+
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-service-tier]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-service-tier[`quarkus.langchain4j.openai."model-name".chat-model.service-tier`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai."model-name".chat-model.service-tier+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Specifies the processing type used for serving the request.
+
+If set to `auto`, then the request will be processed with the service tier configured in the Project settings. If set to `default`, then the request will be processed with the standard pricing and performance for the selected model. If set to `flex` or `priority`, then the request will be processed with the corresponding service tier. When not set, the default behavior is `auto`.
+
+When the service tier parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_SERVICE_TIER+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_SERVICE_TIER+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|`default`
+
 a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-embedding-model-model-name]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-embedding-model-model-name[`quarkus.langchain4j.openai."model-name".embedding-model.model-name`]##
 ifdef::add-copy-button-to-config-props[]
 config_property_copy_button:+++quarkus.langchain4j.openai."model-name".embedding-model.model-name+++[]
diff --git a/docs/modules/ROOT/pages/includes/quarkus-langchain4j-openai_quarkus.langchain4j.adoc b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-openai_quarkus.langchain4j.adoc
@@ -574,6 +574,54 @@ endif::add-copy-button-to-env-var[]
 |list of string
 |
 
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-reasoning-effort]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-reasoning-effort[`quarkus.langchain4j.openai.chat-model.reasoning-effort`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai.chat-model.reasoning-effort+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Constrains effort on reasoning for reasoning models. Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+
+Note: The `gpt-5-pro` model defaults to (and only supports) high reasoning effort.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_REASONING_EFFORT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_REASONING_EFFORT+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|
+
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-service-tier]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-chat-model-service-tier[`quarkus.langchain4j.openai.chat-model.service-tier`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai.chat-model.service-tier+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Specifies the processing type used for serving the request.
+
+If set to `auto`, then the request will be processed with the service tier configured in the Project settings. If set to `default`, then the request will be processed with the standard pricing and performance for the selected model. If set to `flex` or `priority`, then the request will be processed with the corresponding service tier. When not set, the default behavior is `auto`.
+
+When the service tier parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_SERVICE_TIER+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI_CHAT_MODEL_SERVICE_TIER+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|`default`
+
 a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-embedding-model-model-name]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-embedding-model-model-name[`quarkus.langchain4j.openai.embedding-model.model-name`]##
 ifdef::add-copy-button-to-config-props[]
 config_property_copy_button:+++quarkus.langchain4j.openai.embedding-model.model-name+++[]
@@ -1457,6 +1505,54 @@ endif::add-copy-button-to-env-var[]
 |list of string
 |
 
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-reasoning-effort]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-reasoning-effort[`quarkus.langchain4j.openai."model-name".chat-model.reasoning-effort`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai."model-name".chat-model.reasoning-effort+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Constrains effort on reasoning for reasoning models. Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+
+Note: The `gpt-5-pro` model defaults to (and only supports) high reasoning effort.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_REASONING_EFFORT+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_REASONING_EFFORT+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|
+
+a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-service-tier]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-chat-model-service-tier[`quarkus.langchain4j.openai."model-name".chat-model.service-tier`]##
+ifdef::add-copy-button-to-config-props[]
+config_property_copy_button:+++quarkus.langchain4j.openai."model-name".chat-model.service-tier+++[]
+endif::add-copy-button-to-config-props[]
+
+
+[.description]
+--
+Specifies the processing type used for serving the request.
+
+If set to `auto`, then the request will be processed with the service tier configured in the Project settings. If set to `default`, then the request will be processed with the standard pricing and performance for the selected model. If set to `flex` or `priority`, then the request will be processed with the corresponding service tier. When not set, the default behavior is `auto`.
+
+When the service tier parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
+
+
+ifdef::add-copy-button-to-env-var[]
+Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_SERVICE_TIER+++[]
+endif::add-copy-button-to-env-var[]
+ifndef::add-copy-button-to-env-var[]
+Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENAI__MODEL_NAME__CHAT_MODEL_SERVICE_TIER+++`
+endif::add-copy-button-to-env-var[]
+--
+|string
+|`default`
+
 a| [[quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-embedding-model-model-name]] [.property-path]##link:#quarkus-langchain4j-openai_quarkus-langchain4j-openai-model-name-embedding-model-model-name[`quarkus.langchain4j.openai."model-name".embedding-model.model-name`]##
 ifdef::add-copy-button-to-config-props[]
 config_property_copy_button:+++quarkus.langchain4j.openai."model-name".embedding-model.model-name+++[]
diff --git a/model-providers/openai/openai-vanilla/runtime/src/main/java/io/quarkiverse/langchain4j/openai/runtime/OpenAiRecorder.java b/model-providers/openai/openai-vanilla/runtime/src/main/java/io/quarkiverse/langchain4j/openai/runtime/OpenAiRecorder.java
@@ -28,6 +28,7 @@
 import dev.langchain4j.model.moderation.DisabledModerationModel;
 import dev.langchain4j.model.moderation.ModerationModel;
 import dev.langchain4j.model.openai.OpenAiChatModel;
+import dev.langchain4j.model.openai.OpenAiChatRequestParameters;
 import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
 import dev.langchain4j.model.openai.OpenAiModerationModel;
 import dev.langchain4j.model.openai.OpenAiStreamingChatModel;
@@ -77,6 +78,11 @@ public Function<SyntheticCreationalContext<ChatModel>, ChatModel> chatModel(Stri
             ChatModelConfig chatModelConfig = openAiConfig.chatModel();
             var builder = (QuarkusOpenAiChatModelBuilderFactory.Builder) OpenAiChatModel.builder();
 
+            OpenAiChatRequestParameters.Builder defaultChatRequestParametersBuilder = OpenAiChatRequestParameters.builder();
+            if (chatModelConfig.reasoningEffort().isPresent()) {
+                defaultChatRequestParametersBuilder.reasoningEffort(chatModelConfig.reasoningEffort().get());
+            }
+
             builder
                     .tlsConfigurationName(openAiConfig.tlsConfigurationName().orElse(null))
                     .configName(configName)
@@ -92,7 +98,9 @@ public Function<SyntheticCreationalContext<ChatModel>, ChatModel> chatModel(Stri
                     .presencePenalty(chatModelConfig.presencePenalty())
                     .frequencyPenalty(chatModelConfig.frequencyPenalty())
                     .responseFormat(chatModelConfig.responseFormat().orElse(null))
+                    .defaultRequestParameters(defaultChatRequestParametersBuilder.build())
                     .strictJsonSchema(chatModelConfig.strictJsonSchema().orElse(null))
+                    .serviceTier(chatModelConfig.serviceTier().orElse(null))
                     .stop(chatModelConfig.stop().orElse(null));
 
             openAiConfig.organizationId().ifPresent(builder::organizationId);
diff --git a/model-providers/openai/openai-vanilla/runtime/src/main/java/io/quarkiverse/langchain4j/openai/runtime/config/ChatModelConfig.java b/model-providers/openai/openai-vanilla/runtime/src/main/java/io/quarkiverse/langchain4j/openai/runtime/config/ChatModelConfig.java
@@ -96,4 +96,29 @@ public interface ChatModelConfig {
      * @return
      */
     Optional<List<String>> stop();
+
+    /**
+     * Constrains effort on reasoning for reasoning models.
+     * Currently supported values are {@code minimal}, {@code low}, {@code medium}, and {@code high}.
+     * Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.
+     * <p>
+     * Note: The {@code gpt-5-pro} model defaults to (and only supports) high reasoning effort.
+     */
+    Optional<String> reasoningEffort();
+
+    /**
+     * Specifies the processing type used for serving the request.
+     * <p>
+     * If set to {@code auto}, then the request will be processed with the service tier configured in the Project settings.
+     * If set to {@code default}, then the request will be processed with the standard pricing and performance for the selected
+     * model.
+     * If set to {@code flex} or {@code priority}, then the request will be processed with the corresponding service tier.
+     * When not set, the default behavior is {@code auto}.
+     * <p>
+     * When the service tier parameter is set, the response body will include the {@code service_tier} value based on the
+     * processing mode actually used to serve the request.
+     * This response value may be different from the value set in the parameter.
+     */
+    @ConfigDocDefault("default")
+    Optional<String> serviceTier();
 }