Skip to content

Commit 40fe333

Browse files
author
BitsAdmin
committed
Merge branch 'fix/tool_call' into 'integration_2025-03-06_779226727170'
feat: [development task] ark-runtime-manual-Java (1060704) See merge request iaasng/volcengine-java-sdk!404
2 parents f5b1657 + 22a5c00 commit 40fe333

File tree

5 files changed

+140
-2
lines changed

5 files changed

+140
-2
lines changed

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionChunk.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.volcengine.ark.runtime.model.completion.chat;
22

3+
import com.fasterxml.jackson.annotation.JsonAlias;
34
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
45
import com.volcengine.ark.runtime.model.Usage;
56

@@ -27,6 +28,26 @@ public class ChatCompletionChunk {
2728
*/
2829
String model;
2930

31+
/**
32+
* Specifies the latency tier to use for processing the request.
33+
*
34+
* This parameter is relevant for customers subscribed to the scale tier service:
35+
*
36+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
37+
* utilize scale tier credits until they are exhausted.
38+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
39+
* be processed using the default service tier with a lower uptime SLA and no
40+
* latency guarentee.
41+
* - If set to 'default', the request will be processed using the default service
42+
* tier with a lower uptime SLA and no latency guarentee.
43+
* - When not set, the default behavior is 'auto'.
44+
*
45+
* When this parameter is set, the response body will include the `service_tier`
46+
* utilized.
47+
*/
48+
@JsonAlias("service_tier")
49+
String serviceTier;
50+
3051
/**
3152
* A list of all generated completions.
3253
*/
@@ -69,6 +90,14 @@ public void setModel(String model) {
6990
this.model = model;
7091
}
7192

93+
public String getServiceTier() {
94+
return serviceTier;
95+
}
96+
97+
public void setServiceTier(String serviceTier) {
98+
this.serviceTier = serviceTier;
99+
}
100+
72101
public List<ChatCompletionChoice> getChoices() {
73102
return choices;
74103
}
@@ -92,6 +121,7 @@ public String toString() {
92121
", object='" + object + '\'' +
93122
", created=" + created +
94123
", model='" + model + '\'' +
124+
", service_tier='" + serviceTier + '\'' +
95125
", choices=" + choices +
96126
", usage=" + usage +
97127
'}';

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionRequest.java

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,26 @@ public class ChatCompletionRequest {
4848
@JsonProperty("stream_options")
4949
ChatCompletionRequestStreamOptions streamOptions;
5050

51+
/**
52+
* Specifies the latency tier to use for processing the request.
53+
*
54+
* This parameter is relevant for customers subscribed to the scale tier service:
55+
*
56+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
57+
* utilize scale tier credits until they are exhausted.
58+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
59+
* be processed using the default service tier with a lower uptime SLA and no
60+
* latency guarentee.
61+
* - If set to 'default', the request will be processed using the default service
62+
* tier with a lower uptime SLA and no latency guarentee.
63+
* - When not set, the default behavior is 'auto'.
64+
*
65+
* When this parameter is set, the response body will include the `service_tier`
66+
* utilized.
67+
*/
68+
@JsonProperty("service_tier")
69+
String serviceTier;
70+
5171
/**
5272
* Up to 4 sequences where the API will stop generating further tokens.
5373
*/
@@ -120,6 +140,11 @@ public class ChatCompletionRequest {
120140
*/
121141
Integer n;
122142

143+
/**
144+
* Whether to enable parallel function calling during tool use.
145+
*/
146+
Boolean parallelToolCalls;
147+
123148
@JsonProperty("tool_choice")
124149
Object toolChoice;
125150

@@ -181,6 +206,14 @@ public void setStreamOptions(ChatCompletionRequestStreamOptions streamOptions) {
181206
this.streamOptions = streamOptions;
182207
}
183208

209+
public String getServiceTier() {
210+
return serviceTier;
211+
}
212+
213+
public void setServiceTier(String serviceTier) {
214+
this.serviceTier = serviceTier;
215+
}
216+
184217
public List<String> getStop() {
185218
return stop;
186219
}
@@ -277,6 +310,14 @@ public void setN(Integer n) {
277310
this.n = n;
278311
}
279312

313+
public Boolean getParallelToolCalls() {
314+
return parallelToolCalls;
315+
}
316+
317+
public void setParallelToolCalls(Boolean parallelToolCalls) {
318+
this.parallelToolCalls = parallelToolCalls;
319+
}
320+
280321
public Object getToolChoice() {
281322
return toolChoice;
282323
}
@@ -302,6 +343,7 @@ public String toString() {
302343
", topP=" + topP +
303344
", stream=" + stream +
304345
", streamOptions=" + streamOptions +
346+
", serviceTier=" + serviceTier +
305347
", stop=" + stop +
306348
", maxTokens=" + maxTokens +
307349
", presencePenalty=" + presencePenalty +
@@ -314,6 +356,7 @@ public String toString() {
314356
", topLogprobs=" + topLogprobs +
315357
", repetitionPenalty=" + repetitionPenalty +
316358
", n=" + n +
359+
", parallelToolCalls=" + parallelToolCalls +
317360
", toolChoice=" + toolChoice +
318361
", responseFormat=" + responseFormat +
319362
'}';
@@ -444,6 +487,7 @@ public static class Builder {
444487
private Double topP;
445488
private Boolean stream;
446489
private ChatCompletionRequestStreamOptions streamOptions;
490+
private String serviceTier;
447491
private List<String> stop;
448492
private Integer maxTokens;
449493
private Double presencePenalty;
@@ -456,6 +500,7 @@ public static class Builder {
456500
private Integer topLogprobs;
457501
private Double repetitionPenalty;
458502
private Integer n;
503+
private Boolean parallelToolCalls;
459504
private Object toolChoice;
460505
private ChatCompletionRequestResponseFormat responseFormat;
461506

@@ -489,6 +534,11 @@ public ChatCompletionRequest.Builder streamOptions(ChatCompletionRequestStreamOp
489534
return this;
490535
}
491536

537+
public ChatCompletionRequest.Builder serviceTier(String serviceTier) {
538+
this.serviceTier = serviceTier;
539+
return this;
540+
}
541+
492542
public ChatCompletionRequest.Builder stop(List<String> stop) {
493543
this.stop = stop;
494544
return this;
@@ -549,6 +599,11 @@ public ChatCompletionRequest.Builder n(Integer n) {
549599
return this;
550600
}
551601

602+
public ChatCompletionRequest.Builder parallelToolCalls(Boolean parallelToolCalls) {
603+
this.parallelToolCalls = parallelToolCalls;
604+
return this;
605+
}
606+
552607
public ChatCompletionRequest.Builder toolChoice(String toolChoice) {
553608
this.toolChoice = toolChoice;
554609
return this;
@@ -572,6 +627,7 @@ public ChatCompletionRequest build() {
572627
chatCompletionRequest.setTopP(topP);
573628
chatCompletionRequest.setStream(stream);
574629
chatCompletionRequest.setStreamOptions(streamOptions);
630+
chatCompletionRequest.setServiceTier(serviceTier);
575631
chatCompletionRequest.setStop(stop);
576632
chatCompletionRequest.setMaxTokens(maxTokens);
577633
chatCompletionRequest.setPresencePenalty(presencePenalty);
@@ -584,6 +640,7 @@ public ChatCompletionRequest build() {
584640
chatCompletionRequest.setTopLogprobs(topLogprobs);
585641
chatCompletionRequest.setRepetitionPenalty(repetitionPenalty);
586642
chatCompletionRequest.setN(n);
643+
chatCompletionRequest.setParallelToolCalls(parallelToolCalls);
587644
chatCompletionRequest.setToolChoice(toolChoice);
588645
chatCompletionRequest.setResponseFormat(responseFormat);
589646
return chatCompletionRequest;

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatCompletionResult.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.volcengine.ark.runtime.model.completion.chat;
22

3+
import com.fasterxml.jackson.annotation.JsonAlias;
34
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
45
import com.volcengine.ark.runtime.model.Usage;
56

@@ -28,6 +29,26 @@ public class ChatCompletionResult {
2829
*/
2930
String model;
3031

32+
/**
33+
* Specifies the latency tier to use for processing the request.
34+
*
35+
* This parameter is relevant for customers subscribed to the scale tier service:
36+
*
37+
* - If set to 'auto', and the endpoint is Scale tier enabled, the system will
38+
* utilize scale tier credits until they are exhausted.
39+
* - If set to 'auto', and the endpoint is not Scale tier enabled, the request will
40+
* be processed using the default service tier with a lower uptime SLA and no
41+
* latency guarentee.
42+
* - If set to 'default', the request will be processed using the default service
43+
* tier with a lower uptime SLA and no latency guarentee.
44+
* - When not set, the default behavior is 'auto'.
45+
*
46+
* When this parameter is set, the response body will include the `service_tier`
47+
* utilized.
48+
*/
49+
@JsonAlias("service_tier")
50+
String serviceTier;
51+
3152
/**
3253
* A list of all generated completions.
3354
*/
@@ -70,6 +91,14 @@ public void setModel(String model) {
7091
this.model = model;
7192
}
7293

94+
public String getServiceTier() {
95+
return serviceTier;
96+
}
97+
98+
public void setServiceTier(String serviceTier) {
99+
this.serviceTier = serviceTier;
100+
}
101+
73102
public List<ChatCompletionChoice> getChoices() {
74103
return choices;
75104
}
@@ -93,6 +122,7 @@ public String toString() {
93122
", object='" + object + '\'' +
94123
", created=" + created +
95124
", model='" + model + '\'' +
125+
", service_tier='" + serviceTier + '\'' +
96126
", choices=" + choices +
97127
", usage=" + usage +
98128
'}';

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/completion/chat/ChatToolCall.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,19 @@ public class ChatToolCall {
2020
*/
2121
ChatFunctionCall function;
2222

23+
/**
24+
* The index of the tool call in the list of tool calls
25+
*/
26+
Integer index;
27+
2328
public ChatToolCall(String id, String type, ChatFunctionCall function) {
2429
this.id = id;
2530
this.type = type;
2631
this.function = function;
2732
}
2833

29-
public ChatToolCall(){}
34+
public ChatToolCall() {
35+
}
3036

3137
public String getId() {
3238
return id;
@@ -52,12 +58,21 @@ public void setFunction(ChatFunctionCall function) {
5258
this.function = function;
5359
}
5460

61+
public Integer getIndex() {
62+
return index;
63+
}
64+
65+
public void setIndex(Integer index) {
66+
this.index = index;
67+
}
68+
5569
@Override
5670
public String toString() {
5771
return "ChatToolCall{" +
5872
"id='" + id + '\'' +
5973
", type='" + type + '\'' +
6074
", function=" + function +
75+
", index=" + index +
6176
'}';
6277
}
6378
}

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/service/ArkService.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,7 @@ public static class Builder {
339339
private String region = BASE_REGION;
340340
private String baseUrl = BASE_URL;
341341
private Duration timeout = DEFAULT_TIMEOUT;
342+
private Duration callTimeout;
342343
private Duration connectTimeout = DEFAULT_CONNECT_TIMEOUT;
343344
private int retryTimes = DEFAULT_RETRY_TIMES;
344345
private Proxy proxy;
@@ -379,6 +380,11 @@ public ArkService.Builder timeout(Duration timeout) {
379380
return this;
380381
}
381382

383+
public ArkService.Builder callTimeout(Duration callTimeout) {
384+
this.callTimeout = callTimeout;
385+
return this;
386+
}
387+
382388
public ArkService.Builder connectTimeout(Duration connectTimeout) {
383389
this.connectTimeout = connectTimeout;
384390
return this;
@@ -439,7 +445,7 @@ public ArkService build() {
439445
.addInterceptor(new RetryInterceptor(retryTimes))
440446
.addInterceptor(new BatchInterceptor())
441447
.readTimeout(timeout.toMillis(), TimeUnit.MILLISECONDS)
442-
.callTimeout(timeout.toMillis(), TimeUnit.MILLISECONDS)
448+
.callTimeout(callTimeout == null ? timeout.toMillis() : callTimeout.toMillis(), TimeUnit.MILLISECONDS)
443449
.connectTimeout(connectTimeout)
444450
.build();
445451
Retrofit retrofit = defaultRetrofit(client, mapper, baseUrl, callbackExecutor);

0 commit comments

Comments
 (0)