Skip to content

Commit 06b1618

Browse files
songguocolakevinlin09
authored andcommitted
feat(model/qwen-asr,model/cosyvoice-v3): support more parameters
1 parent fac5fb5 commit 06b1618

File tree

7 files changed

+105
-0
lines changed

7 files changed

+105
-0
lines changed

samples/QwenASRUsage.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
2+
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
3+
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
4+
import com.alibaba.dashscope.common.MultiModalMessage;
5+
import com.alibaba.dashscope.common.Role;
6+
import com.alibaba.dashscope.exception.ApiException;
7+
import com.alibaba.dashscope.exception.NoApiKeyException;
8+
import com.alibaba.dashscope.exception.UploadFileException;
9+
import io.reactivex.Flowable;
10+
11+
import java.util.Arrays;
12+
import java.util.HashMap;
13+
14+
public class QwenASRUsage {
15+
private static final String modelName = "qwen3-asr-flash";
16+
public static void streamCall()
17+
throws ApiException, NoApiKeyException, UploadFileException {
18+
MultiModalConversation conv = new MultiModalConversation();
19+
// must create mutable map.
20+
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
21+
.content(Arrays.asList(new HashMap<String, Object>(){{put("audio", "https://dashscope.oss-cn-beijing.aliyuncs.com/audios/welcome.mp3");}}
22+
)).build();
23+
MultiModalConversationParam param = MultiModalConversationParam.builder()
24+
.model(modelName)
25+
.message(userMessage)
26+
.parameter("asr_options", new HashMap<String, Object>(){{put("enable_lid", true); put("language", "zh");}})
27+
.incrementalOutput(true)
28+
.build();
29+
Flowable<MultiModalConversationResult> result = conv.streamCall(param);
30+
result.blockingForEach(item -> {
31+
if (item.getOutput() == null) {
32+
return;
33+
}
34+
try {
35+
System.out.println(item.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
36+
} catch (Exception e){
37+
System.exit(0);
38+
}
39+
});
40+
}
41+
public static void main(String[] args) {
42+
try {
43+
streamCall();
44+
} catch (ApiException | NoApiKeyException | UploadFileException e) {
45+
System.out.println(e.getMessage());
46+
}
47+
System.exit(0);
48+
}
49+
}

src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationUsage.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ public class MultiModalConversationUsage {
3232
@SerializedName("height")
3333
private Integer height;
3434

35+
@SerializedName("seconds")
36+
private Integer seconds;
37+
3538
@SerializedName("input_tokens_details")
3639
private MultiModalConversationTokensDetails inputTokensDetails;
3740

src/main/java/com/alibaba/dashscope/audio/tts/SpeechSynthesisApiKeywords.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,12 @@ public class SpeechSynthesisApiKeywords {
3434
public static final String TIMESTAMP_TEXT = "text";
3535

3636
public static final String TONE = "tone";
37+
38+
public static final String INSTRUCTION = "instruction";
39+
40+
public static final String SEED = "seed";
41+
42+
public static final String LANGUAGE_HINTS = "language_hints";
43+
44+
public static final String STYLE = "style";
3745
}

src/main/java/com/alibaba/dashscope/audio/ttsv2/SpeechSynthesisParam.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import lombok.experimental.SuperBuilder;
1111

1212
import java.util.HashMap;
13+
import java.util.List;
1314
import java.util.Map;
1415

1516
/** @author lengjiayi */
@@ -44,6 +45,19 @@ public class SpeechSynthesisParam extends FullDuplexServiceParam {
4445
@Builder.Default private long connectionTimeout = -1;
4546
@Builder.Default private long firstPackageTimeout = -1;
4647

48+
/**
49+
* the following parameters take effect
50+
* only in CosyVoice V3 and later versions.
51+
* instruction for synthesis. */
52+
@Builder.Default private String instruction = null;
53+
/** random seed. */
54+
@Builder.Default private int seed = 0;
55+
/** language hints. */
56+
@Builder.Default private List<String> languageHints = null;
57+
/** synthesis style */
58+
@Builder.Default private int style = 0;
59+
60+
4761
@Override
4862
public Map<String, Object> getParameters() {
4963
Map<String, Object> params = new HashMap<>();
@@ -59,6 +73,18 @@ public Map<String, Object> getParameters() {
5973
if (getFormat().getFormat() == "opus") {
6074
params.put(SpeechSynthesisApiKeywords.BIT_RATE, getFormat().getBitRate());
6175
}
76+
if (getInstruction() != null) {
77+
params.put(SpeechSynthesisApiKeywords.INSTRUCTION, getInstruction());
78+
}
79+
if (getSeed() != 0) {
80+
params.put(SpeechSynthesisApiKeywords.SEED, getSeed());
81+
}
82+
if (getLanguageHints() != null) {
83+
params.put(SpeechSynthesisApiKeywords.LANGUAGE_HINTS, getLanguageHints());
84+
}
85+
if (getStyle() != 0) {
86+
params.put(SpeechSynthesisApiKeywords.STYLE, getStyle());
87+
}
6288
params.putAll(parameters);
6389
return params;
6490
}

src/main/java/com/alibaba/dashscope/common/MultiModalMessage.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,7 @@ public class MultiModalMessage {
3737

3838
/** chain of thought content */
3939
String reasoningContent;
40+
41+
/** annotations result for message */
42+
private List<Map<String, Object>> annotations;
4043
}

src/main/java/com/alibaba/dashscope/common/MultiModalMessageAdapter.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,15 @@ public void write(JsonWriter out, MultiModalMessage value) throws IOException {
109109
}
110110
out.endArray();
111111

112+
if (value.getAnnotations() != null) {
113+
out.name(ApiKeywords.ANNOTATIONS);
114+
out.beginArray();
115+
for (Map<String, Object> item : value.getAnnotations()) {
116+
writeMapObject(out, item);
117+
}
118+
out.endArray();
119+
}
120+
112121
if (value.getReasoningContent() != null) {
113122
out.name(ApiKeywords.REASONING_CONTENT);
114123
out.value(value.getReasoningContent());
@@ -157,6 +166,11 @@ public MultiModalMessage read(JsonReader in) throws IOException {
157166
objectMap.remove(ApiKeywords.CONTENT);
158167
}
159168

169+
if (objectMap.containsKey(ApiKeywords.ANNOTATIONS)) {
170+
msg.setAnnotations((List<Map<String, Object>>) objectMap.get(ApiKeywords.ANNOTATIONS));
171+
objectMap.remove(ApiKeywords.ANNOTATIONS);
172+
}
173+
160174
if (objectMap.containsKey(ApiKeywords.REASONING_CONTENT)) {
161175
String reasoningContent = (String) objectMap.get(ApiKeywords.REASONING_CONTENT);
162176
msg.setReasoningContent(reasoningContent);

src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,4 +179,6 @@ public class ApiKeywords {
179179
public static final String PROMPT_EXTEND = "prompt_extend";
180180

181181
public static final String WATERMARK = "watermark";
182+
183+
public static final String ANNOTATIONS = "annotations";
182184
}

0 commit comments

Comments
 (0)