Skip to content

Commit 3365a67

Browse files
songguocolakevinlin09
authored andcommitted
feat(model/qwen-tts): add param language_type
1 parent a40abfe commit 3365a67

File tree

8 files changed

+107
-0
lines changed

8 files changed

+107
-0
lines changed

src/main/java/com/alibaba/dashscope/aigc/multimodalconversation/MultiModalConversationParam.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ public class MultiModalConversationParam extends HalfDuplexServiceParam {
147147
/** number of images */
148148
private Integer n;
149149

150+
/** language type for tts */
151+
private String languageType;
152+
150153
@Override
151154
public JsonObject getHttpBody() {
152155
JsonObject requestObject = new JsonObject();
@@ -172,6 +175,10 @@ public JsonObject getInput() {
172175
jsonObject.addProperty(ApiKeywords.VOICE, voice.getValue());
173176
}
174177

178+
if (languageType != null) {
179+
jsonObject.addProperty(ApiKeywords.LANGUAGE_TYPE, languageType);
180+
}
181+
175182
return jsonObject;
176183
}
177184

src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConfig.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ public class QwenTtsRealtimeConfig {
2323
QwenTtsRealtimeAudioFormat responseFormat = QwenTtsRealtimeAudioFormat.PCM_24000HZ_MONO_16BIT;
2424
/** mode */
2525
@Builder.Default String mode = "server_commit";
26+
/** languageType for tts */
27+
@Builder.Default String languageType = null;
2628
/** The extra parameters. */
2729
@Builder.Default Map<String, Object> parameters = null;
2830

@@ -32,6 +34,9 @@ public JsonObject getConfig() {
3234
config.put(QwenTtsRealtimeConstants.MODE, mode);
3335
config.put(QwenTtsRealtimeConstants.RESPONSE_FORMAT, responseFormat.getFormat());
3436
config.put(QwenTtsRealtimeConstants.SAMPLE_RATE, responseFormat.getSampleRate());
37+
if (languageType != null) {
38+
config.put(QwenTtsRealtimeConstants.LANGUAGE_TYPE,languageType);
39+
}
3540
if (parameters != null) {
3641
for (Map.Entry<String, Object> entry : parameters.entrySet()) {
3742
config.put(entry.getKey(), entry.getValue());

src/main/java/com/alibaba/dashscope/audio/qwen_tts_realtime/QwenTtsRealtimeConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ public class QwenTtsRealtimeConstants {
99
public static final String MODE = "mode";
1010
public static final String RESPONSE_FORMAT = "response_format";
1111
public static final String SAMPLE_RATE = "sample_rate";
12+
public static final String LANGUAGE_TYPE = "language_type";
1213
public static final String PROTOCOL_EVENT_ID = "event_id";
1314
public static final String PROTOCOL_TYPE = "type";
1415
public static final String PROTOCOL_SESSION = "session";

src/main/java/com/alibaba/dashscope/audio/tts/timestamp/Sentence.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@ public class Sentence {
1818
@SerializedName("end_time")
1919
int endTime;
2020

21+
@SerializedName("index")
22+
int index;
2123
/** Sentence words. */
24+
@SerializedName("words")
2225
List<Word> words;
2326

2427
public static Sentence from(String message) {

src/main/java/com/alibaba/dashscope/audio/tts/timestamp/Word.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ public class Word {
1616
@SerializedName("end_time")
1717
int endTime;
1818

19+
@SerializedName("begin_index")
20+
int beginIndex;
21+
22+
@SerializedName("end_index")
23+
int endIndex;
24+
1925
/** Word. */
2026
String text;
2127

src/main/java/com/alibaba/dashscope/utils/ApiKeywords.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,4 +181,6 @@ public class ApiKeywords {
181181
public static final String WATERMARK = "watermark";
182182

183183
public static final String ANNOTATIONS = "annotations";
184+
185+
public static final String LANGUAGE_TYPE = "language_type";
184186
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright (c) Alibaba, Inc. and its affiliates.
2+
3+
package com.alibaba.dashscope;
4+
5+
import com.alibaba.dashscope.aigc.multimodalconversation.*;
6+
import com.alibaba.dashscope.exception.ApiException;
7+
import com.alibaba.dashscope.exception.InputRequiredException;
8+
import com.alibaba.dashscope.exception.NoApiKeyException;
9+
import com.alibaba.dashscope.exception.UploadFileException;
10+
import com.alibaba.dashscope.utils.Constants;
11+
import lombok.extern.slf4j.Slf4j;
12+
import okhttp3.MediaType;
13+
import okhttp3.mockwebserver.MockResponse;
14+
import okhttp3.mockwebserver.MockWebServer;
15+
import okhttp3.mockwebserver.RecordedRequest;
16+
import okio.ByteString;
17+
import org.junit.jupiter.api.AfterEach;
18+
import org.junit.jupiter.api.BeforeEach;
19+
import org.junit.jupiter.api.Test;
20+
import org.junit.jupiter.api.parallel.Execution;
21+
import org.junit.jupiter.api.parallel.ExecutionMode;
22+
import org.junitpioneer.jupiter.SetEnvironmentVariable;
23+
import static org.junit.jupiter.api.Assertions.assertNotNull;
24+
import static org.junit.jupiter.api.Assertions.assertTrue;
25+
import java.io.IOException;
26+
27+
@Execution(ExecutionMode.SAME_THREAD)
28+
@Slf4j
29+
@SetEnvironmentVariable(key = "DASHSCOPE_API_KEY", value = "1234")
30+
public class TestMultiModalConversationQwenTTS {
31+
private static final MediaType MEDIA_TYPE_APPLICATION_JSON =
32+
MediaType.parse("application/json; charset=utf-8");
33+
MockWebServer server;
34+
35+
@BeforeEach
36+
public void before() throws IOException {
37+
38+
this.server = new MockWebServer();
39+
this.server.start();
40+
41+
String responseStr =
42+
"{\"output\": {\"audio\": {\"data\": \"\", \"expires_at\": 1758187426, \"id\": \"audio_d8ab01f8-2793-4f65-a656-664e6e6c0d19\", \"url\": \"http://dashscope-result.demo.reuslt/abc\"}, \"finish_reason\": \"stop\"}, \"usage\": {\"characters\": 56}, \"request_id\": \"d8ab01f8-2793-4f65-a656-664e6e6c0d19\"}";
43+
server.enqueue(
44+
new MockResponse()
45+
.setBody(responseStr)
46+
.setHeader("content-type", MEDIA_TYPE_APPLICATION_JSON));
47+
}
48+
49+
@AfterEach
50+
public void after() throws IOException {
51+
server.close();
52+
}
53+
54+
@Test
55+
@SetEnvironmentVariable(key = "DASHSCOPE_NETWORK_LOGGING_LEVEL", value = "HEADERS")
56+
public void testSendAndReceive()
57+
throws ApiException, NoApiKeyException, IOException, InterruptedException,
58+
InputRequiredException, UploadFileException {
59+
int port = server.getPort();
60+
Constants.baseHttpApiUrl = String.format("http://127.0.0.1:%s", port);
61+
MultiModalConversation conv = new MultiModalConversation();
62+
63+
MultiModalConversationParam param =
64+
MultiModalConversationParam.builder()
65+
.model("qwen-tts-latest")
66+
.text("Today is a wonderful day to build something people love!")
67+
.voice(AudioParameters.Voice.DYLAN)
68+
.languageType("zh")
69+
.build();
70+
MultiModalConversationResult result = conv.call(param);
71+
RecordedRequest request = this.server.takeRequest();
72+
String requestBody = request.getBody().readUtf8();
73+
74+
// Assert that the request body contains the language_type
75+
assertTrue(requestBody.contains("\"language_type\":\"zh\""));
76+
77+
// Assert properties of the result
78+
assertNotNull(result);
79+
assertNotNull(result.getOutput());
80+
assertNotNull(result.getOutput().getAudio());
81+
}
82+
}

src/test/java/com/alibaba/dashscope/TestQwenTtsRealtime.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ public void onClose(int code, String reason) {
132132
QwenTtsRealtimeConfig.builder()
133133
.voice("Chelsie")
134134
.mode("commit")
135+
.languageType("zh")
135136
.build();
136137
ttsRealtime.updateSession(config);
137138
ttsRealtime.appendText("你好");

0 commit comments

Comments
 (0)