chore(Speech to Text): Apply manual changes

lpatino10 · lpatino10 · commit f4bf3729cac4 · 2018-08-01T21:33:48.000+03:00
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java
@@ -71,13 +71,19 @@
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.UpgradeLanguageModelOptions;
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word;
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeCallback;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
 import com.ibm.watson.developer_cloud.util.GsonSingleton;
 import com.ibm.watson.developer_cloud.util.RequestUtils;
 import com.ibm.watson.developer_cloud.util.ResponseConverterUtils;
 import com.ibm.watson.developer_cloud.util.Validator;
+import okhttp3.HttpUrl;
 import okhttp3.MediaType;
 import okhttp3.MultipartBody;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
 import okhttp3.RequestBody;
+import okhttp3.WebSocket;
 
 /**
  * The IBM&reg; Speech to Text service provides an API that uses IBM's speech-recognition capabilities to produce
@@ -324,6 +330,55 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
     return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
   }
 
+  /**
+   * Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
+   * responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
+   * efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
+   * results are returned for any request; to enable interim results, set the interimResults parameter to true.
+   *
+   * The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
+   * utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
+   * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
+   * (For the audio/l16 format, you can specify the endianness.)
+   *
+   * @param recognizeOptions the recognize options
+   * @param callback the {@link RecognizeCallback} instance where results will be sent
+   * @return the {@link WebSocket}
+   */
+  public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
+    Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
+    Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
+    Validator.notNull(callback, "callback cannot be null");
+
+    HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
+
+    if (recognizeOptions.model() != null) {
+      urlBuilder.addQueryParameter("model", recognizeOptions.model());
+    }
+    if (recognizeOptions.customizationId() != null) {
+      urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
+    }
+    if (recognizeOptions.acousticCustomizationId() != null) {
+      urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
+    }
+    if (recognizeOptions.baseModelVersion() != null) {
+      urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
+    }
+    if (recognizeOptions.customizationWeight() != null) {
+      urlBuilder.addQueryParameter("customization_weight",
+          String.valueOf(recognizeOptions.customizationWeight()));
+    }
+
+    String url = urlBuilder.toString().replace("https://", "wss://");
+    Request.Builder builder = new Request.Builder().url(url);
+
+    setAuthentication(builder);
+    setDefaultHeaders(builder);
+
+    OkHttpClient client = configureHttpClient();
+    return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
+  }
+
   /**
    * Check a job.
    *
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import com.google.gson.annotations.SerializedName;
 import com.ibm.watson.developer_cloud.service.model.GenericModel;
 import com.ibm.watson.developer_cloud.util.Validator;
 
@@ -98,7 +99,8 @@ public interface Model {
     String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
   }
 
-  private InputStream audio;
+  private transient InputStream audio;
+  @SerializedName("content-type")
   private String contentType;
   private String model;
   private String customizationId;
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
 
   private String transcript;
   private Double confidence;
-  private List<String> timestamps;
+  private List<SpeechTimestamp> timestamps;
   @SerializedName("word_confidence")
-  private List<String> wordConfidence;
+  private List<SpeechWordConfidence> wordConfidence;
 
   /**
    * Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
    *
    * @return the timestamps
    */
-  public List<String> getTimestamps() {
+  public List<SpeechTimestamp> getTimestamps() {
     return timestamps;
   }
 
@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
    *
    * @return the wordConfidence
    */
-  public List<String> getWordConfidence() {
+  public List<SpeechWordConfidence> getWordConfidence() {
     return wordConfidence;
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {`
`24`	`24`
`25`	`25`	`private String transcript;`
`26`	`26`	`private Double confidence;`
`27`		`- private List<String> timestamps;`
	`27`	`+ private List<SpeechTimestamp> timestamps;`
`28`	`28`	`@SerializedName("word_confidence")`
`29`		`- private List<String> wordConfidence;`
	`29`	`+ private List<SpeechWordConfidence> wordConfidence;`
`30`	`30`
`31`	`31`	`/**`
`32`	`32`	`* Gets the transcript.`
`@@ -60,7 +60,7 @@ public Double getConfidence() {`
`60`	`60`	`*`
`61`	`61`	`* @return the timestamps`
`62`	`62`	`*/`
`63`		`- public List<String> getTimestamps() {`
	`63`	`+ public List<SpeechTimestamp> getTimestamps() {`
`64`	`64`	`return timestamps;`
`65`	`65`	`}`
`66`	`66`
`@@ -73,7 +73,7 @@ public List<String> getTimestamps() {`
`73`	`73`	`*`
`74`	`74`	`* @return the wordConfidence`
`75`	`75`	`*/`
`76`		`- public List<String> getWordConfidence() {`
	`76`	`+ public List<SpeechWordConfidence> getWordConfidence() {`
`77`	`77`	`return wordConfidence;`
`78`	`78`	`}`
`79`	`79`	`}`