Skip to content

Commit f4bf372

Browse files
committed
chore(Speech to Text): Apply manual changes
1 parent e983641 commit f4bf372

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,13 +71,19 @@
7171
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.UpgradeLanguageModelOptions;
7272
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word;
7373
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words;
74+
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeCallback;
75+
import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
7476
import com.ibm.watson.developer_cloud.util.GsonSingleton;
7577
import com.ibm.watson.developer_cloud.util.RequestUtils;
7678
import com.ibm.watson.developer_cloud.util.ResponseConverterUtils;
7779
import com.ibm.watson.developer_cloud.util.Validator;
80+
import okhttp3.HttpUrl;
7881
import okhttp3.MediaType;
7982
import okhttp3.MultipartBody;
83+
import okhttp3.OkHttpClient;
84+
import okhttp3.Request;
8085
import okhttp3.RequestBody;
86+
import okhttp3.WebSocket;
8187

8288
/**
8389
* The IBM® Speech to Text service provides an API that uses IBM's speech-recognition capabilities to produce
@@ -324,6 +330,55 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
324330
return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
325331
}
326332

333+
/**
334+
* Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
335+
* responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
336+
* efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
337+
* results are returned for any request; to enable interim results, set the interimResults parameter to true.
338+
*
339+
* The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
340+
* utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
341+
* audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
342+
* (For the audio/l16 format, you can specify the endianness.)
343+
*
344+
* @param recognizeOptions the recognize options
345+
* @param callback the {@link RecognizeCallback} instance where results will be sent
346+
* @return the {@link WebSocket}
347+
*/
348+
public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
349+
Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
350+
Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
351+
Validator.notNull(callback, "callback cannot be null");
352+
353+
HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
354+
355+
if (recognizeOptions.model() != null) {
356+
urlBuilder.addQueryParameter("model", recognizeOptions.model());
357+
}
358+
if (recognizeOptions.customizationId() != null) {
359+
urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
360+
}
361+
if (recognizeOptions.acousticCustomizationId() != null) {
362+
urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
363+
}
364+
if (recognizeOptions.baseModelVersion() != null) {
365+
urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
366+
}
367+
if (recognizeOptions.customizationWeight() != null) {
368+
urlBuilder.addQueryParameter("customization_weight",
369+
String.valueOf(recognizeOptions.customizationWeight()));
370+
}
371+
372+
String url = urlBuilder.toString().replace("https://", "wss://");
373+
Request.Builder builder = new Request.Builder().url(url);
374+
375+
setAuthentication(builder);
376+
setDefaultHeaders(builder);
377+
378+
OkHttpClient client = configureHttpClient();
379+
return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
380+
}
381+
327382
/**
328383
* Check a job.
329384
*

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.ArrayList;
2020
import java.util.List;
2121

22+
import com.google.gson.annotations.SerializedName;
2223
import com.ibm.watson.developer_cloud.service.model.GenericModel;
2324
import com.ibm.watson.developer_cloud.util.Validator;
2425

@@ -98,7 +99,8 @@ public interface Model {
9899
String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
99100
}
100101

101-
private InputStream audio;
102+
private transient InputStream audio;
103+
@SerializedName("content-type")
102104
private String contentType;
103105
private String model;
104106
private String customizationId;

speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
2424

2525
private String transcript;
2626
private Double confidence;
27-
private List<String> timestamps;
27+
private List<SpeechTimestamp> timestamps;
2828
@SerializedName("word_confidence")
29-
private List<String> wordConfidence;
29+
private List<SpeechWordConfidence> wordConfidence;
3030

3131
/**
3232
* Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
6060
*
6161
* @return the timestamps
6262
*/
63-
public List<String> getTimestamps() {
63+
public List<SpeechTimestamp> getTimestamps() {
6464
return timestamps;
6565
}
6666

@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
7373
*
7474
* @return the wordConfidence
7575
*/
76-
public List<String> getWordConfidence() {
76+
public List<SpeechWordConfidence> getWordConfidence() {
7777
return wordConfidence;
7878
}
7979
}

0 commit comments

Comments
 (0)