chore(Speech to Text): Apply manual changes

lpatino10 · lpatino10 · commit 3dbdff62c39d · 2018-11-19T14:51:32.000-05:00
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java
@@ -70,12 +70,16 @@
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.UpgradeLanguageModelOptions;
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word;
 import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeCallback;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
 import com.ibm.watson.developer_cloud.util.GsonSingleton;
 import com.ibm.watson.developer_cloud.util.RequestUtils;
 import com.ibm.watson.developer_cloud.util.ResponseConverterUtils;
 import com.ibm.watson.developer_cloud.util.Validator;
-import okhttp3.MultipartBody;
-import okhttp3.RequestBody;
+import okhttp3.HttpUrl;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.WebSocket;
 
 /**
  * The IBM&reg; Speech to Text service provides APIs that use IBM's speech-recognition capabilities to produce
@@ -264,7 +268,9 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
     Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
     String[] pathSegments = { "v1/recognize" };
     RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
-    builder.header("Content-Type", recognizeOptions.contentType());
+    if (recognizeOptions.contentType() != null) {
+      builder.header("Content-Type", recognizeOptions.contentType());
+    }
     if (recognizeOptions.model() != null) {
       builder.query("model", recognizeOptions.model());
     }
@@ -317,6 +323,58 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
     return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class));
   }
 
+  /**
+   * Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
+   * responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
+   * efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
+   * results are returned for any request; to enable interim results, set the interimResults parameter to true.
+   *
+   * The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
+   * utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
+   * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
+   * (For the audio/l16 format, you can specify the endianness.)
+   *
+   * @param recognizeOptions the recognize options
+   * @param callback the {@link RecognizeCallback} instance where results will be sent
+   * @return the {@link WebSocket}
+   */
+  public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
+    Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
+    Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
+    Validator.notNull(callback, "callback cannot be null");
+
+    HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
+
+    if (recognizeOptions.model() != null) {
+      urlBuilder.addQueryParameter("model", recognizeOptions.model());
+    }
+    if (recognizeOptions.customizationId() != null) {
+      urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
+    }
+    if (recognizeOptions.languageCustomizationId() != null) {
+      urlBuilder.addQueryParameter("language_customization_id", recognizeOptions.languageCustomizationId());
+    }
+    if (recognizeOptions.acousticCustomizationId() != null) {
+      urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
+    }
+    if (recognizeOptions.baseModelVersion() != null) {
+      urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
+    }
+    if (recognizeOptions.customizationWeight() != null) {
+      urlBuilder.addQueryParameter("customization_weight",
+          String.valueOf(recognizeOptions.customizationWeight()));
+    }
+
+    String url = urlBuilder.toString().replace("https://", "wss://");
+    Request.Builder builder = new Request.Builder().url(url);
+
+    setAuthentication(builder);
+    setDefaultHeaders(builder);
+
+    OkHttpClient client = configureHttpClient();
+    return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
+  }
+
   /**
    * Check a job.
    *
@@ -468,7 +526,9 @@ public ServiceCall<RecognitionJob> createJob(CreateJobOptions createJobOptions)
     Validator.notNull(createJobOptions, "createJobOptions cannot be null");
     String[] pathSegments = { "v1/recognitions" };
     RequestBuilder builder = RequestBuilder.post(RequestBuilder.constructHttpUrl(getEndPoint(), pathSegments));
-    builder.header("Content-Type", createJobOptions.contentType());
+    if (createJobOptions.contentType() != null) {
+      builder.header("Content-Type", createJobOptions.contentType());
+    }
     if (createJobOptions.model() != null) {
       builder.query("model", createJobOptions.model());
     }
@@ -884,11 +944,7 @@ public ServiceCall<Void> addCorpus(AddCorpusOptions addCorpusOptions) {
     if (addCorpusOptions.allowOverwrite() != null) {
       builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite()));
     }
-    MultipartBody.Builder multipartBuilder = new MultipartBody.Builder();
-    multipartBuilder.setType(MultipartBody.FORM);
-    RequestBody corpusFileBody = RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain");
-    multipartBuilder.addFormDataPart("corpus_file", addCorpusOptions.corpusFilename(), corpusFileBody);
-    builder.body(multipartBuilder.build());
+    builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain"));
     return createServiceCall(builder.build(), ResponseConverterUtils.getVoid());
   }
 
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/AddAudioOptions.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/AddAudioOptions.java
@@ -129,6 +129,19 @@ private Builder(AddAudioOptions addAudioOptions) {
     public Builder() {
     }
 
+    /**
+     * Instantiates a new builder.
+     *
+     * @param customizationId the customizationId
+     * @param audioName the audioName
+     * @deprecated audioResource and contentType are now required, so this constructor will be removed. Please use
+     * the constructor with 4 parameters.
+     */
+    public Builder(String customizationId, String audioName) {
+      this.customizationId = customizationId;
+      this.audioName = audioName;
+    }
+
     /**
      * Instantiates a new builder with required properties.
      *
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/CreateJobOptions.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/CreateJobOptions.java
@@ -483,6 +483,8 @@ public Builder speakerLabels(Boolean speakerLabels) {
      *
      * @param customizationId the customizationId
      * @return the CreateJobOptions builder
+     * @deprecated Use the `languageCustomizationId` setter to specify the customization ID (GUID) of a custom
+     * language model that is to be used with the recognition request. Do not specify both parameters with a request.
      */
     public Builder customizationId(String customizationId) {
       this.customizationId = customizationId;
@@ -505,7 +507,6 @@ public Builder audio(File audio) throws FileNotFoundException {
 
   private CreateJobOptions(Builder builder) {
     Validator.notNull(builder.audio, "audio cannot be null");
-    Validator.notNull(builder.contentType, "contentType cannot be null");
     audio = builder.audio;
     contentType = builder.contentType;
     model = builder.model;
@@ -859,6 +860,8 @@ public Boolean speakerLabels() {
    * language model that is to be used with the recognition request. Do not specify both parameters with a request.
    *
    * @return the customizationId
+   * @deprecated Use the `languageCustomizationId` getter to get the customization ID (GUID) of a custom
+   * language model that is to be used with the recognition request.
    */
   public String customizationId() {
     return customizationId;
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/RecognizeOptions.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import com.google.gson.annotations.SerializedName;
 import com.ibm.watson.developer_cloud.service.model.GenericModel;
 import com.ibm.watson.developer_cloud.util.Validator;
 
@@ -101,7 +102,8 @@ public interface Model {
     String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
   }
 
-  private InputStream audio;
+  private transient InputStream audio;
+  @SerializedName("content-type")
   private String contentType;
   private String model;
   private String languageCustomizationId;
@@ -119,6 +121,7 @@ public interface Model {
   private Boolean smartFormatting;
   private Boolean speakerLabels;
   private String customizationId;
+  private Boolean interimResults;
 
   /**
    * Builder.
@@ -142,6 +145,7 @@ public static class Builder {
     private Boolean smartFormatting;
     private Boolean speakerLabels;
     private String customizationId;
+    private Boolean interimResults;
 
     private Builder(RecognizeOptions recognizeOptions) {
       audio = recognizeOptions.audio;
@@ -162,6 +166,7 @@ private Builder(RecognizeOptions recognizeOptions) {
       smartFormatting = recognizeOptions.smartFormatting;
       speakerLabels = recognizeOptions.speakerLabels;
       customizationId = recognizeOptions.customizationId;
+      interimResults = recognizeOptions.interimResults;
     }
 
     /**
@@ -205,6 +210,19 @@ public Builder addKeyword(String keyword) {
       return this;
     }
 
+    /**
+     * Set the interimResults.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param interimResults the interimResults
+     * @return the interimResults
+     */
+    public Builder interimResults(Boolean interimResults) {
+      this.interimResults = interimResults;
+      return this;
+    }
+
     /**
      * Set the audio.
      *
@@ -398,6 +416,8 @@ public Builder speakerLabels(Boolean speakerLabels) {
      *
      * @param customizationId the customizationId
      * @return the RecognizeOptions builder
+     * @deprecated Use the `languageCustomizationId` setter to specify the customization ID (GUID) of a custom
+     * language model that is to be used with the recognition request. Do not specify both parameters with a request.
      */
     public Builder customizationId(String customizationId) {
       this.customizationId = customizationId;
@@ -420,7 +440,6 @@ public Builder audio(File audio) throws FileNotFoundException {
 
   private RecognizeOptions(Builder builder) {
     Validator.notNull(builder.audio, "audio cannot be null");
-    Validator.notNull(builder.contentType, "contentType cannot be null");
     audio = builder.audio;
     contentType = builder.contentType;
     model = builder.model;
@@ -439,6 +458,7 @@ private RecognizeOptions(Builder builder) {
     smartFormatting = builder.smartFormatting;
     speakerLabels = builder.speakerLabels;
     customizationId = builder.customizationId;
+    interimResults = builder.interimResults;
   }
 
   /**
@@ -702,8 +722,24 @@ public Boolean speakerLabels() {
    * language model that is to be used with the recognition request. Do not specify both parameters with a request.
    *
    * @return the customizationId
+   * @deprecated Use the `languageCustomizationId` getter to get the customization ID (GUID) of a custom
+   * language model that is to be used with the recognition request.
    */
   public String customizationId() {
     return customizationId;
   }
+
+  /**
+   * Gets the interimResults.
+   *
+   * If `true`, the service returns interim results as a stream of `SpeechRecognitionResults` objects. By default,
+   * the service returns a single `SpeechRecognitionResults` object with final results only.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the interimResults
+   */
+  public Boolean interimResults() {
+    return interimResults;
+  }
 }
diff --git a/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java b/speech-to-text/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechRecognitionAlternative.java
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
 
   private String transcript;
   private Double confidence;
-  private List<String> timestamps;
+  private List<SpeechTimestamp> timestamps;
   @SerializedName("word_confidence")
-  private List<String> wordConfidence;
+  private List<SpeechWordConfidence> wordConfidence;
 
   /**
    * Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
    *
    * @return the timestamps
    */
-  public List<String> getTimestamps() {
+  public List<SpeechTimestamp> getTimestamps() {
     return timestamps;
   }
 
@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
    *
    * @return the wordConfidence
    */
-  public List<String> getWordConfidence() {
+  public List<SpeechWordConfidence> getWordConfidence() {
     return wordConfidence;
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {`
`24`	`24`
`25`	`25`	`private String transcript;`
`26`	`26`	`private Double confidence;`
`27`		`- private List<String> timestamps;`
	`27`	`+ private List<SpeechTimestamp> timestamps;`
`28`	`28`	`@SerializedName("word_confidence")`
`29`		`- private List<String> wordConfidence;`
	`29`	`+ private List<SpeechWordConfidence> wordConfidence;`
`30`	`30`
`31`	`31`	`/**`
`32`	`32`	`* Gets the transcript.`
`@@ -60,7 +60,7 @@ public Double getConfidence() {`
`60`	`60`	`*`
`61`	`61`	`* @return the timestamps`
`62`	`62`	`*/`
`63`		`- public List<String> getTimestamps() {`
	`63`	`+ public List<SpeechTimestamp> getTimestamps() {`
`64`	`64`	`return timestamps;`
`65`	`65`	`}`
`66`	`66`
`@@ -73,7 +73,7 @@ public List<String> getTimestamps() {`
`73`	`73`	`*`
`74`	`74`	`* @return the wordConfidence`
`75`	`75`	`*/`
`76`		`- public List<String> getWordConfidence() {`
	`76`	`+ public List<SpeechWordConfidence> getWordConfidence() {`
`77`	`77`	`return wordConfidence;`
`78`	`78`	`}`
`79`	`79`	`}`