Skip to content

Commit 0c4c10e

Browse files
songguocolakevinlin09
authored andcommitted
feat:app/multimodal-dialog: add upstream.asr_post_processing
1 parent ac58590 commit 0c4c10e

File tree

3 files changed

+89
-13
lines changed

3 files changed

+89
-13
lines changed

src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialog.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.alibaba.dashscope.multimodal;
22

3+
import com.alibaba.dashscope.Version;
34
import com.alibaba.dashscope.api.SynchronizeFullDuplexApi;
45
import com.alibaba.dashscope.common.*;
56
import com.alibaba.dashscope.exception.ApiException;
@@ -8,6 +9,7 @@
89
import com.alibaba.dashscope.protocol.ApiServiceOption;
910
import com.alibaba.dashscope.protocol.Protocol;
1011
import com.alibaba.dashscope.protocol.StreamingMode;
12+
import com.alibaba.dashscope.utils.Constants;
1113
import com.alibaba.dashscope.utils.JsonUtils;
1214
import com.google.gson.JsonObject;
1315
import io.reactivex.BackpressureStrategy;
@@ -95,15 +97,16 @@ public Flowable<Object> getStreamingData() {
9597

9698
public static MultiModalRequestParamWithStream FromMultiModalParam(
9799
MultiModalRequestParam param, Flowable<Object> dataStream, String preRequestId) {
98-
100+
ClientInfo clientInfo = param.getClientInfo();
101+
clientInfo.setSdk("dashscope-sdk-java "+ Version.version);
99102
return MultiModalRequestParamWithStream.builder()
100103
.parameter("pre_task_id", preRequestId)
101104
.headers(param.getHeaders())
102105
.upStream(param.getUpStream())
103106
.customInput(param.getCustomInput())
104107
.bizParams(param.getBizParams())
105108
.downStream(param.getDownStream())
106-
.clientInfo(param.getClientInfo())
109+
.clientInfo(clientInfo)
107110
.dialogAttributes(param.getDialogAttributes())
108111
.images(param.getImages())
109112
.dataStream(dataStream)
@@ -402,6 +405,12 @@ public void updateInfo(MultiModalRequestParam.UpdateParams updateParams) {
402405
if (updateParams != null && updateParams.images != null) {
403406
requestParamWithStream.setImages(updateParams.images);
404407
}
408+
if (updateParams != null && updateParams.upStream != null) {
409+
requestParamWithStream.setUpStream(updateParams.upStream);
410+
}
411+
if (updateParams != null && updateParams.downStream != null) {
412+
requestParamWithStream.setDownStream(updateParams.downStream);
413+
}
405414
sendTextFrame("UpdateInfo");
406415
}
407416

src/main/java/com/alibaba/dashscope/multimodal/MultiModalDialogApiKeyWords.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ public class MultiModalDialogApiKeyWords {
1717
public static String CONST_NAME_UP_STREAM_AUDIO_FORMAT = "audio_format";
1818
public static String CONST_NAME_UP_STREAM_TYPE = "type";
1919
public static String CONST_NAME_UP_STREAM_MODE = "mode";
20+
public static String CONST_NAME_REPLACE_WORD_SOURCE = "source";
21+
public static String CONST_NAME_REPLACE_WORD_TARGET = "target";
22+
public static String CONST_NAME_REPLACE_WORD_MATCH_MODE = "match_mode";
23+
public static String CONST_NAME_REPLACE_WORDS = "replace_words";
24+
public static String CONST_NAME_ASR_POST_PROCESSING = "asr_post_processing";
25+
public static String CONST_NAME_VOCABULARY_ID = "vocabulary_id";
2026

2127
public static String CONST_NAME_DOWN_STREAM_VOICE = "voice";
2228
public static String CONST_NAME_DOWN_STREAM_SAMPLE_RATE = "sample_rate";
@@ -43,6 +49,7 @@ public class MultiModalDialogApiKeyWords {
4349
public static String CONST_NAME_CLIENT_INFO_LOCATION_LONGITUDE = "longitude";
4450
public static String CONST_NAME_CLIENT_INFO_LOCATION_CITY_NAME = "city_name";
4551
public static String CONST_NAME_CLIENT_INFO_ACTIVE_FOREGROUND_APP = "active_foreground_app";
52+
public static String CONST_NAME_CLIENT_INFO_SDK = "sdk";
4653

4754
public static String CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS = "user_defined_params";
4855
public static String CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS = "user_defined_tokens";

src/main/java/com/alibaba/dashscope/multimodal/MultiModalRequestParam.java

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@
77

88
import com.alibaba.dashscope.base.FullDuplexServiceParam;
99
import io.reactivex.Flowable;
10-
import lombok.Builder;
11-
import lombok.Data;
12-
import lombok.EqualsAndHashCode;
10+
import lombok.*;
1311
import lombok.experimental.SuperBuilder;
14-
import lombok.val;
12+
import org.jetbrains.annotations.NotNull;
1513

14+
import java.util.ArrayList;
1615
import java.util.HashMap;
1716
import java.util.List;
1817
import java.util.Map;
@@ -58,9 +57,25 @@ public Map<String, Object> getInputs() {
5857
public static class UpStream {
5958
private String type = "AudioOnly";
6059
private String mode;
61-
// private int sampleRate;
60+
private AsrPostProcessing asrPostProcessing;
61+
@Builder.Default private Integer sampleRate = 16000;
62+
private String vocabularyId = null;
6263
@Builder.Default private String audioFormat = CONST_AUDIO_FORMAT_PCM; //support pcm/opus
6364
private Map<String, Object> passThroughParams;
65+
66+
@Builder
67+
@Setter
68+
public static class AsrPostProcessing {
69+
private List<ReplaceWord> replaceWords;
70+
71+
@Builder
72+
@Setter
73+
public static class ReplaceWord {
74+
private String source;
75+
private String target;
76+
private String matchMode;
77+
}
78+
}
6479
}
6580

6681
@Builder
@@ -83,6 +98,7 @@ public static class DialogAttributes {
8398
}
8499

85100
@Builder
101+
@Setter
86102
public static class ClientInfo {
87103
private String userId;
88104
private Device device;
@@ -91,6 +107,7 @@ public static class ClientInfo {
91107
private Object status;
92108
private String activeForegroundApp;
93109
private Map<String, Object> passThroughParams;
110+
private String sdk;
94111

95112
@Builder
96113
public static class Network {
@@ -133,6 +150,8 @@ public void clearParameters() {
133150

134151
@Builder
135152
public static class UpdateParams {
153+
UpStream upStream;
154+
DownStream downStream;
136155
List<Object> images;
137156
BizParams bizParams;
138157
ClientInfo clientInfo;
@@ -146,6 +165,16 @@ public Map<String, Object> getParameters() {
146165
upStreamParams.put(CONST_NAME_UP_STREAM_TYPE, upStream.type);
147166
upStreamParams.put(CONST_NAME_UP_STREAM_MODE, upStream.mode);
148167
upStreamParams.put(CONST_NAME_UP_STREAM_AUDIO_FORMAT, upStream.audioFormat);
168+
if (upStream.asrPostProcessing != null){
169+
final var asrPostProcessingParams = getUpstreamAsrPostProcessing();
170+
if (!asrPostProcessingParams.isEmpty()) {
171+
upStreamParams.put(CONST_NAME_ASR_POST_PROCESSING, asrPostProcessingParams);
172+
}
173+
}
174+
upStreamParams.put(CONST_NAME_DOWN_STREAM_SAMPLE_RATE, upStream.sampleRate);
175+
if (upStream.vocabularyId != null) {
176+
upStreamParams.put(CONST_NAME_VOCABULARY_ID, upStream.vocabularyId);
177+
}
149178
if (upStream.passThroughParams != null) {
150179
upStreamParams.putAll(upStream.passThroughParams);
151180
}
@@ -199,17 +228,32 @@ public Map<String, Object> getParameters() {
199228
if (clientInfo.passThroughParams != null) {
200229
clientInfoParams.putAll(clientInfo.passThroughParams);
201230
}
231+
if (clientInfo.sdk != null){
232+
clientInfoParams.put(CONST_NAME_CLIENT_INFO_SDK, clientInfo.sdk);
233+
}
202234
params.put(CONST_NAME_CLIENT_INFO, clientInfoParams);
203235
}
204236

205237
if (bizParams != null) {
206238
val bizParamsParams = new HashMap<String, Object>();
207-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS, bizParams.userDefinedParams);
208-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS, bizParams.userDefinedTokens);
209-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_TOOL_PROMPTS, bizParams.toolPrompts);
210-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_QUERY_PARAMS, bizParams.userQueryParams);
211-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_PROMPT_PARAMS, bizParams.userPromptParams);
212-
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_VIDEOS, bizParams.videos);
239+
if (bizParams.userDefinedParams != null) {
240+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_PARAMS, bizParams.userDefinedParams);
241+
}
242+
if (bizParams.userDefinedTokens != null) {
243+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_DEFINED_TOKENS, bizParams.userDefinedTokens);
244+
}
245+
if (bizParams.toolPrompts != null) {
246+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_TOOL_PROMPTS, bizParams.toolPrompts);
247+
}
248+
if (bizParams.userQueryParams != null) {
249+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_QUERY_PARAMS, bizParams.userQueryParams);
250+
}
251+
if (bizParams.userPromptParams != null) {
252+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_USER_PROMPT_PARAMS, bizParams.userPromptParams);
253+
}
254+
if (bizParams.videos != null) {
255+
bizParamsParams.put(CONST_NAME_BIZ_PARAMS_VIDEOS, bizParams.videos);
256+
}
213257
if (bizParams.passThroughParams != null) {
214258
bizParamsParams.putAll(bizParams.passThroughParams);
215259
}
@@ -222,6 +266,22 @@ public Map<String, Object> getParameters() {
222266
return params;
223267
}
224268

269+
private @NotNull HashMap<String, Object> getUpstreamAsrPostProcessing() {
270+
val asrPostProcessingParams = new HashMap<String, Object>();
271+
if (upStream.asrPostProcessing.replaceWords != null) {
272+
val replaceWords = new ArrayList<Map<String, Object>>();
273+
for (val replaceWord : upStream.asrPostProcessing.replaceWords) {
274+
val replaceWordObj= new HashMap<String, Object>();
275+
replaceWordObj.put(CONST_NAME_REPLACE_WORD_SOURCE, replaceWord.source);
276+
replaceWordObj.put(CONST_NAME_REPLACE_WORD_TARGET, replaceWord.target);
277+
replaceWordObj.put(CONST_NAME_REPLACE_WORD_MATCH_MODE, replaceWord.matchMode);
278+
replaceWords.add(replaceWordObj);
279+
}
280+
asrPostProcessingParams.put(CONST_NAME_REPLACE_WORDS, replaceWords);
281+
}
282+
return asrPostProcessingParams;
283+
}
284+
225285
@Override
226286
public Flowable<Object> getStreamingData() {
227287
return null;

0 commit comments

Comments
 (0)