Skip to content

Commit 3dbdff6

Browse files
committed
chore(Speech to Text): Apply manual changes
1 parent d7bded0 commit 3dbdff6

File tree

5 files changed

+124
-16
lines changed

5 files changed

+124
-16
lines changed

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,16 @@
7070
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.UpgradeLanguageModelOptions;
7171
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word;
7272
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words;
73+
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeCallback;
74+
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
7375
import com.ibm.watson.developer_cloud.util.GsonSingleton;
7476
import com.ibm.watson.developer_cloud.util.RequestUtils;
7577
import com.ibm.watson.developer_cloud.util.ResponseConverterUtils;
7678
import com.ibm.watson.developer_cloud.util.Validator;
77-
import okhttp3.MultipartBody;
78-
import okhttp3.RequestBody;
79+
import okhttp3.HttpUrl;
80+
import okhttp3.OkHttpClient;
81+
import okhttp3.Request;
82+
import okhttp3.WebSocket;
7983

8084
/**
8185
* The IBM® Speech to Text service provides APIs that use IBM's speech-recognition capabilities to produce
@@ -264,7 +268,9 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
264268
Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
265269
String[] pathSegments = { "v1/recognize" };
266270
RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
267-
builder.header("Content-Type", recognizeOptions.contentType());
271+
if (recognizeOptions.contentType() != null) {
272+
builder.header("Content-Type", recognizeOptions.contentType());
273+
}
268274
if (recognizeOptions.model() != null) {
269275
builder.query("model", recognizeOptions.model());
270276
}
@@ -317,6 +323,58 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
317323
return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
318324
}
319325

326+
/**
327+
* Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
328+
* responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
329+
* efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
330+
* results are returned for any request; to enable interim results, set the interimResults parameter to true.
331+
*
332+
* The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
333+
* utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
334+
* audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
335+
* (For the audio/l16 format, you can specify the endianness.)
336+
*
337+
* @param recognizeOptions the recognize options
338+
* @param callback the {@link RecognizeCallback} instance where results will be sent
339+
* @return the {@link WebSocket}
340+
*/
341+
public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
342+
Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
343+
Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
344+
Validator.notNull(callback, "callback cannot be null");
345+
346+
HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
347+
348+
if (recognizeOptions.model() != null) {
349+
urlBuilder.addQueryParameter("model", recognizeOptions.model());
350+
}
351+
if (recognizeOptions.customizationId() != null) {
352+
urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
353+
}
354+
if (recognizeOptions.languageCustomizationId() != null) {
355+
urlBuilder.addQueryParameter("language_customization_id", recognizeOptions.languageCustomizationId());
356+
}
357+
if (recognizeOptions.acousticCustomizationId() != null) {
358+
urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
359+
}
360+
if (recognizeOptions.baseModelVersion() != null) {
361+
urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
362+
}
363+
if (recognizeOptions.customizationWeight() != null) {
364+
urlBuilder.addQueryParameter("customization_weight",
365+
String.valueOf(recognizeOptions.customizationWeight()));
366+
}
367+
368+
String url = urlBuilder.toString().replace("https://", "wss://");
369+
Request.Builder builder = new Request.Builder().url(url);
370+
371+
setAuthentication(builder);
372+
setDefaultHeaders(builder);
373+
374+
OkHttpClient client = configureHttpClient();
375+
return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
376+
}
377+
320378
/**
321379
* Check a job.
322380
*
@@ -468,7 +526,9 @@ public ServiceCall<RecognitionJob> createJob(CreateJobOptions createJobOptions)
468526
Validator.notNull(createJobOptions, "createJobOptions cannot be null");
469527
String[] pathSegments = { "v1/recognitions" };
470528
RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
471-
builder.header("Content-Type", createJobOptions.contentType());
529+
if (createJobOptions.contentType() != null) {
530+
builder.header("Content-Type", createJobOptions.contentType());
531+
}
472532
if (createJobOptions.model() != null) {
473533
builder.query("model", createJobOptions.model());
474534
}
@@ -884,11 +944,7 @@ public ServiceCall<Void> addCorpus(AddCorpusOptions addCorpusOptions) {
884944
if (addCorpusOptions.allowOverwrite() != null) {
885945
builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite()));
886946
}
887-
MultipartBody.Builder multipartBuilder = new MultipartBody.Builder();
888-
multipartBuilder.setType(MultipartBody.FORM);
889-
RequestBody corpusFileBody = RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain");
890-
multipartBuilder.addFormDataPart("corpus_file", addCorpusOptions.corpusFilename(), corpusFileBody);
891-
builder.body(multipartBuilder.build());
947+
builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain"));
892948
return createServiceCall(builder.build(), ResponseConverterUtils.getVoid());
893949
}
894950

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/AddAudioOptions.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,19 @@ private Builder(AddAudioOptions addAudioOptions) {
129129
public Builder() {
130130
}
131131

132+
/**
133+
* Instantiates a new builder.
134+
*
135+
* @param customizationId the customizationId
136+
* @param audioName the audioName
137+
* @deprecated audioResource and contentType are now required, so this constructor will be removed. Please use
138+
* the constructor with 4 parameters.
139+
*/
140+
public Builder(String customizationId, String audioName) {
141+
this.customizationId = customizationId;
142+
this.audioName = audioName;
143+
}
144+
132145
/**
133146
* Instantiates a new builder with required properties.
134147
*

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/CreateJobOptions.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,8 @@ public Builder speakerLabels(Boolean speakerLabels) {
483483
*
484484
* @param customizationId the customizationId
485485
* @return the CreateJobOptions builder
486+
* @deprecated Use the `languageCustomizationId` setter to specify the customization ID (GUID) of a custom
487+
* language model that is to be used with the recognition request. Do not specify both parameters with a request.
486488
*/
487489
public Builder customizationId(String customizationId) {
488490
this.customizationId = customizationId;
@@ -505,7 +507,6 @@ public Builder audio(File audio) throws FileNotFoundException {
505507

506508
private CreateJobOptions(Builder builder) {
507509
Validator.notNull(builder.audio, "audio cannot be null");
508-
Validator.notNull(builder.contentType, "contentType cannot be null");
509510
audio = builder.audio;
510511
contentType = builder.contentType;
511512
model = builder.model;
@@ -859,6 +860,8 @@ public Boolean speakerLabels() {
859860
* language model that is to be used with the recognition request. Do not specify both parameters with a request.
860861
*
861862
* @return the customizationId
863+
* @deprecated Use the `languageCustomizationId` getter to get the customization ID (GUID) of a custom
864+
* language model that is to be used with the recognition request.
862865
*/
863866
public String customizationId() {
864867
return customizationId;

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.ArrayList;
2020
import java.util.List;
2121

22+
import com.google.gson.annotations.SerializedName;
2223
import com.ibm.watson.developer_cloud.service.model.GenericModel;
2324
import com.ibm.watson.developer_cloud.util.Validator;
2425

@@ -101,7 +102,8 @@ public interface Model {
101102
String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
102103
}
103104

104-
private InputStream audio;
105+
private transient InputStream audio;
106+
@SerializedName("content-type")
105107
private String contentType;
106108
private String model;
107109
private String languageCustomizationId;
@@ -119,6 +121,7 @@ public interface Model {
119121
private Boolean smartFormatting;
120122
private Boolean speakerLabels;
121123
private String customizationId;
124+
private Boolean interimResults;
122125

123126
/**
124127
* Builder.
@@ -142,6 +145,7 @@ public static class Builder {
142145
private Boolean smartFormatting;
143146
private Boolean speakerLabels;
144147
private String customizationId;
148+
private Boolean interimResults;
145149

146150
private Builder(RecognizeOptions recognizeOptions) {
147151
audio = recognizeOptions.audio;
@@ -162,6 +166,7 @@ private Builder(RecognizeOptions recognizeOptions) {
162166
smartFormatting = recognizeOptions.smartFormatting;
163167
speakerLabels = recognizeOptions.speakerLabels;
164168
customizationId = recognizeOptions.customizationId;
169+
interimResults = recognizeOptions.interimResults;
165170
}
166171

167172
/**
@@ -205,6 +210,19 @@ public Builder addKeyword(String keyword) {
205210
return this;
206211
}
207212

213+
/**
214+
* Set the interimResults.
215+
*
216+
* NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
217+
*
218+
* @param interimResults the interimResults
219+
* @return the interimResults
220+
*/
221+
public Builder interimResults(Boolean interimResults) {
222+
this.interimResults = interimResults;
223+
return this;
224+
}
225+
208226
/**
209227
* Set the audio.
210228
*
@@ -398,6 +416,8 @@ public Builder speakerLabels(Boolean speakerLabels) {
398416
*
399417
* @param customizationId the customizationId
400418
* @return the RecognizeOptions builder
419+
* @deprecated Use the `languageCustomizationId` setter to specify the customization ID (GUID) of a custom
420+
* language model that is to be used with the recognition request. Do not specify both parameters with a request.
401421
*/
402422
public Builder customizationId(String customizationId) {
403423
this.customizationId = customizationId;
@@ -420,7 +440,6 @@ public Builder audio(File audio) throws FileNotFoundException {
420440

421441
private RecognizeOptions(Builder builder) {
422442
Validator.notNull(builder.audio, "audio cannot be null");
423-
Validator.notNull(builder.contentType, "contentType cannot be null");
424443
audio = builder.audio;
425444
contentType = builder.contentType;
426445
model = builder.model;
@@ -439,6 +458,7 @@ private RecognizeOptions(Builder builder) {
439458
smartFormatting = builder.smartFormatting;
440459
speakerLabels = builder.speakerLabels;
441460
customizationId = builder.customizationId;
461+
interimResults = builder.interimResults;
442462
}
443463

444464
/**
@@ -702,8 +722,24 @@ public Boolean speakerLabels() {
702722
* language model that is to be used with the recognition request. Do not specify both parameters with a request.
703723
*
704724
* @return the customizationId
725+
* @deprecated Use the `languageCustomizationId` getter to get the customization ID (GUID) of a custom
726+
* language model that is to be used with the recognition request.
705727
*/
706728
public String customizationId() {
707729
return customizationId;
708730
}
731+
732+
/**
733+
* Gets the interimResults.
734+
*
735+
* If `true`, the service returns interim results as a stream of `SpeechRecognitionResults` objects. By default,
736+
* the service returns a single `SpeechRecognitionResults` object with final results only.
737+
*
738+
* NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
739+
*
740+
* @return the interimResults
741+
*/
742+
public Boolean interimResults() {
743+
return interimResults;
744+
}
709745
}

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
2424

2525
private String transcript;
2626
private Double confidence;
27-
private List<String> timestamps;
27+
private List<SpeechTimestamp> timestamps;
2828
@SerializedName("word_confidence")
29-
private List<String> wordConfidence;
29+
private List<SpeechWordConfidence> wordConfidence;
3030

3131
/**
3232
* Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
6060
*
6161
* @return the timestamps
6262
*/
63-
public List<String> getTimestamps() {
63+
public List<SpeechTimestamp> getTimestamps() {
6464
return timestamps;
6565
}
6666

@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
7373
*
7474
* @return the wordConfidence
7575
*/
76-
public List<String> getWordConfidence() {
76+
public List<SpeechWordConfidence> getWordConfidence() {
7777
return wordConfidence;
7878
}
7979
}

0 commit comments

Comments
 (0)