feat: add parallel_tool_calls

hexiaochun · hexiaochun · commit 22a5c00d017d · 2025-03-03T22:42:52.000+08:00
diff --git a/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionChunk.java b/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionChunk.java
@@ -1,5 +1,6 @@
 package com.volcengine.ark.runtime.model.completion.chat;
 
+import com.fasterxml.jackson.annotation.JsonAlias;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.volcengine.ark.runtime.model.Usage;
 
@@ -27,6 +28,26 @@ public class ChatCompletionChunk {
      */
     String model;
 
+    /**
+     * Specifies the latency tier to use for processing the request.
+     *
+     *     This parameter is relevant for customers subscribed to the scale tier service:
+     *
+     *     - If set to 'auto', and the endpoint is Scale tier enabled, the system will
+     *       utilize scale tier credits until they are exhausted.
+     *     - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
+     *       be processed using the default service tier with a lower uptime SLA and no
+     *       latency guarentee.
+     *     - If set to 'default', the request will be processed using the default service
+     *       tier with a lower uptime SLA and no latency guarentee.
+     *     - When not set, the default behavior is 'auto'.
+     *
+     *     When this parameter is set, the response body will include the `service_tier`
+     *     utilized.
+     */
+    @JsonAlias("service_tier")
+    String serviceTier;
+
     /**
      * A list of all generated completions.
      */
@@ -69,6 +90,14 @@ public void setModel(String model) {
         this.model = model;
     }
 
+    public String getServiceTier() {
+        return serviceTier;
+    }
+
+    public void setServiceTier(String serviceTier) {
+        this.serviceTier = serviceTier;
+    }
+
     public List<ChatCompletionChoice> getChoices() {
         return choices;
     }
@@ -92,6 +121,7 @@ public String toString() {
                 ", object='" + object + '\'' +
                 ", created=" + created +
                 ", model='" + model + '\'' +
+                ", service_tier='" + serviceTier + '\'' +
                 ", choices=" + choices +
                 ", usage=" + usage +
                 '}';
diff --git a/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionRequest.java b/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionRequest.java
@@ -50,7 +50,20 @@ public class ChatCompletionRequest {
 
     /**
      * Specifies the latency tier to use for processing the request.
-     * This parameter is relevant for customers subscribed to the scale tier service
+     *
+     *     This parameter is relevant for customers subscribed to the scale tier service:
+     *
+     *     - If set to 'auto', and the endpoint is Scale tier enabled, the system will
+     *       utilize scale tier credits until they are exhausted.
+     *     - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
+     *       be processed using the default service tier with a lower uptime SLA and no
+     *       latency guarentee.
+     *     - If set to 'default', the request will be processed using the default service
+     *       tier with a lower uptime SLA and no latency guarentee.
+     *     - When not set, the default behavior is 'auto'.
+     *
+     *     When this parameter is set, the response body will include the `service_tier`
+     *     utilized.
      */
     @JsonProperty("service_tier")
     String serviceTier;
@@ -127,6 +140,11 @@ public class ChatCompletionRequest {
      */
     Integer n;
 
+    /**
+     * Whether to enable parallel function calling during tool use.
+     */
+    Boolean parallelToolCalls;
+
     @JsonProperty("tool_choice")
     Object toolChoice;
 
@@ -292,6 +310,14 @@ public void setN(Integer n) {
         this.n = n;
     }
 
+    public Boolean getParallelToolCalls() {
+        return parallelToolCalls;
+    }
+
+    public void setParallelToolCalls(Boolean parallelToolCalls) {
+        this.parallelToolCalls = parallelToolCalls;
+    }
+
     public Object getToolChoice() {
         return toolChoice;
     }
@@ -330,6 +356,7 @@ public String toString() {
                 ", topLogprobs=" + topLogprobs +
                 ", repetitionPenalty=" + repetitionPenalty +
                 ", n=" + n +
+                ", parallelToolCalls=" + parallelToolCalls +
                 ", toolChoice=" + toolChoice +
                 ", responseFormat=" + responseFormat +
                 '}';
@@ -473,6 +500,7 @@ public static class Builder {
         private Integer topLogprobs;
         private Double repetitionPenalty;
         private Integer n;
+        private Boolean parallelToolCalls;
         private Object toolChoice;
         private ChatCompletionRequestResponseFormat responseFormat;
 
@@ -571,6 +599,11 @@ public ChatCompletionRequest.Builder n(Integer n) {
             return this;
         }
 
+        public ChatCompletionRequest.Builder parallelToolCalls(Boolean parallelToolCalls) {
+            this.parallelToolCalls = parallelToolCalls;
+            return this;
+        }
+
         public ChatCompletionRequest.Builder toolChoice(String toolChoice) {
             this.toolChoice = toolChoice;
             return this;
@@ -607,6 +640,7 @@ public ChatCompletionRequest build() {
             chatCompletionRequest.setTopLogprobs(topLogprobs);
             chatCompletionRequest.setRepetitionPenalty(repetitionPenalty);
             chatCompletionRequest.setN(n);
+            chatCompletionRequest.setParallelToolCalls(parallelToolCalls);
             chatCompletionRequest.setToolChoice(toolChoice);
             chatCompletionRequest.setResponseFormat(responseFormat);
             return chatCompletionRequest;
diff --git a/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionResult.java b/volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionResult.java
@@ -30,7 +30,21 @@ public class ChatCompletionResult {
     String model;
 
     /**
-     * The service tier used for processing the request.
+     * Specifies the latency tier to use for processing the request.
+     *
+     *     This parameter is relevant for customers subscribed to the scale tier service:
+     *
+     *     - If set to 'auto', and the endpoint is Scale tier enabled, the system will
+     *       utilize scale tier credits until they are exhausted.
+     *     - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
+     *       be processed using the default service tier with a lower uptime SLA and no
+     *       latency guarentee.
+     *     - If set to 'default', the request will be processed using the default service
+     *       tier with a lower uptime SLA and no latency guarentee.
+     *     - When not set, the default behavior is 'auto'.
+     *
+     *     When this parameter is set, the response body will include the `service_tier`
+     *     utilized.
      */
     @JsonAlias("service_tier")
     String serviceTier;
@@ -108,6 +122,7 @@ public String toString() {
                 ", object='" + object + '\'' +
                 ", created=" + created +
                 ", model='" + model + '\'' +
+                ", service_tier='" + serviceTier + '\'' +
                 ", choices=" + choices +
                 ", usage=" + usage +
                 '}';