Skip to content

Commit 22a5c00

Browse files
author
hexiaochun
committed
feat: add parallel_tool_calls
1 parent f5903ea commit 22a5c00

File tree

3 files changed

+81
-2
lines changed

3 files changed

+81
-2
lines changed

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionChunk.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.volcengine.ark.runtime.model.completion.chat;
22

3+
import com.fasterxml.jackson.annotation.JsonAlias;
34
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
45
import com.volcengine.ark.runtime.model.Usage;
56

@@ -27,6 +28,26 @@ public class ChatCompletionChunk {
2728
*/
2829
String model;
2930

31+
/**
32+
* Specifies the latency tier to use for processing the request.
33+
*
34+
* This parameter is relevant for customers subscribed to the scale tier service:
35+
*
36+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
37+
* utilize scale tier credits until they are exhausted.
38+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
39+
* be processed using the default service tier with a lower uptime SLA and no
40+
* latency guarentee.
41+
* - If set to 'default', the request will be processed using the default service
42+
* tier with a lower uptime SLA and no latency guarentee.
43+
* - When not set, the default behavior is 'auto'.
44+
*
45+
* When this parameter is set, the response body will include the `service_tier`
46+
* utilized.
47+
*/
48+
@JsonAlias("service_tier")
49+
String serviceTier;
50+
3051
/**
3152
* A list of all generated completions.
3253
*/
@@ -69,6 +90,14 @@ public void setModel(String model) {
6990
this.model = model;
7091
}
7192

93+
public String getServiceTier() {
94+
return serviceTier;
95+
}
96+
97+
public void setServiceTier(String serviceTier) {
98+
this.serviceTier = serviceTier;
99+
}
100+
72101
public List<ChatCompletionChoice> getChoices() {
73102
return choices;
74103
}
@@ -92,6 +121,7 @@ public String toString() {
92121
", object='" + object + '\'' +
93122
", created=" + created +
94123
", model='" + model + '\'' +
124+
", service_tier='" + serviceTier + '\'' +
95125
", choices=" + choices +
96126
", usage=" + usage +
97127
'}';

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionRequest.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,20 @@ public class ChatCompletionRequest {
5050

5151
/**
5252
* Specifies the latency tier to use for processing the request.
53-
* This parameter is relevant for customers subscribed to the scale tier service
53+
*
54+
* This parameter is relevant for customers subscribed to the scale tier service:
55+
*
56+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
57+
* utilize scale tier credits until they are exhausted.
58+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
59+
* be processed using the default service tier with a lower uptime SLA and no
60+
* latency guarentee.
61+
* - If set to 'default', the request will be processed using the default service
62+
* tier with a lower uptime SLA and no latency guarentee.
63+
* - When not set, the default behavior is 'auto'.
64+
*
65+
* When this parameter is set, the response body will include the `service_tier`
66+
* utilized.
5467
*/
5568
@JsonProperty("service_tier")
5669
String serviceTier;
@@ -127,6 +140,11 @@ public class ChatCompletionRequest {
127140
*/
128141
Integer n;
129142

143+
/**
144+
* Whether to enable parallel function calling during tool use.
145+
*/
146+
Boolean parallelToolCalls;
147+
130148
@JsonProperty("tool_choice")
131149
Object toolChoice;
132150

@@ -292,6 +310,14 @@ public void setN(Integer n) {
292310
this.n = n;
293311
}
294312

313+
public Boolean getParallelToolCalls() {
314+
return parallelToolCalls;
315+
}
316+
317+
public void setParallelToolCalls(Boolean parallelToolCalls) {
318+
this.parallelToolCalls = parallelToolCalls;
319+
}
320+
295321
public Object getToolChoice() {
296322
return toolChoice;
297323
}
@@ -330,6 +356,7 @@ public String toString() {
330356
", topLogprobs=" + topLogprobs +
331357
", repetitionPenalty=" + repetitionPenalty +
332358
", n=" + n +
359+
", parallelToolCalls=" + parallelToolCalls +
333360
", toolChoice=" + toolChoice +
334361
", responseFormat=" + responseFormat +
335362
'}';
@@ -473,6 +500,7 @@ public static class Builder {
473500
private Integer topLogprobs;
474501
private Double repetitionPenalty;
475502
private Integer n;
503+
private Boolean parallelToolCalls;
476504
private Object toolChoice;
477505
private ChatCompletionRequestResponseFormat responseFormat;
478506

@@ -571,6 +599,11 @@ public ChatCompletionRequest.Builder n(Integer n) {
571599
return this;
572600
}
573601

602+
public ChatCompletionRequest.Builder parallelToolCalls(Boolean parallelToolCalls) {
603+
this.parallelToolCalls = parallelToolCalls;
604+
return this;
605+
}
606+
574607
public ChatCompletionRequest.Builder toolChoice(String toolChoice) {
575608
this.toolChoice = toolChoice;
576609
return this;
@@ -607,6 +640,7 @@ public ChatCompletionRequest build() {
607640
chatCompletionRequest.setTopLogprobs(topLogprobs);
608641
chatCompletionRequest.setRepetitionPenalty(repetitionPenalty);
609642
chatCompletionRequest.setN(n);
643+
chatCompletionRequest.setParallelToolCalls(parallelToolCalls);
610644
chatCompletionRequest.setToolChoice(toolChoice);
611645
chatCompletionRequest.setResponseFormat(responseFormat);
612646
return chatCompletionRequest;

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionResult.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,21 @@ public class ChatCompletionResult {
3030
String model;
3131

3232
/**
33-
* The service tier used for processing the request.
33+
* Specifies the latency tier to use for processing the request.
34+
*
35+
* This parameter is relevant for customers subscribed to the scale tier service:
36+
*
37+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
38+
* utilize scale tier credits until they are exhausted.
39+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
40+
* be processed using the default service tier with a lower uptime SLA and no
41+
* latency guarentee.
42+
* - If set to 'default', the request will be processed using the default service
43+
* tier with a lower uptime SLA and no latency guarentee.
44+
* - When not set, the default behavior is 'auto'.
45+
*
46+
* When this parameter is set, the response body will include the `service_tier`
47+
* utilized.
3448
*/
3549
@JsonAlias("service_tier")
3650
String serviceTier;
@@ -108,6 +122,7 @@ public String toString() {
108122
", object='" + object + '\'' +
109123
", created=" + created +
110124
", model='" + model + '\'' +
125+
", service_tier='" + serviceTier + '\'' +
111126
", choices=" + choices +
112127
", usage=" + usage +
113128
'}';

0 commit comments

Comments
 (0)