chore(Speech to Text): Apply manual changes

lpatino10 · lpatino10 · commit a1626678d8e0 · 2019-08-28T14:44:34.000-04:00
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java
@@ -81,9 +81,15 @@
 import com.ibm.watson.speech_to_text.v1.model.UpgradeLanguageModelOptions;
 import com.ibm.watson.speech_to_text.v1.model.Word;
 import com.ibm.watson.speech_to_text.v1.model.Words;
+import com.ibm.watson.speech_to_text.v1.websocket.RecognizeCallback;
+import com.ibm.watson.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
+import okhttp3.HttpUrl;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.WebSocket;
+
 import java.util.Map;
 import java.util.Map.Entry;
-import okhttp3.MultipartBody;
 
 /**
  * The IBM&reg; Speech to Text service provides APIs that use IBM's speech-recognition capabilities to produce
@@ -354,6 +360,54 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
     return createServiceCall(builder.build(), responseConverter);
   }
 
+  /**
+   * Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
+   * responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
+   * efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
+   * results are returned for any request; to enable interim results, set the interimResults parameter to true.
+   *
+   * The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
+   * utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
+   * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
+   * (For the audio/l16 format, you can specify the endianness.)
+   *
+   * @param recognizeOptions the recognize options
+   * @param callback the {@link RecognizeCallback} instance where results will be sent
+   * @return the {@link WebSocket}
+   */
+  public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
+    com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
+    com.ibm.cloud.sdk.core.util.Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
+    com.ibm.cloud.sdk.core.util.Validator.notNull(callback, "callback cannot be null");
+
+    HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
+
+    if (recognizeOptions.model() != null) {
+      urlBuilder.addQueryParameter("model", recognizeOptions.model());
+    }
+    if (recognizeOptions.customizationId() != null) {
+      urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
+    }
+    if (recognizeOptions.languageCustomizationId() != null) {
+      urlBuilder.addQueryParameter("language_customization_id", recognizeOptions.languageCustomizationId());
+    }
+    if (recognizeOptions.acousticCustomizationId() != null) {
+      urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
+    }
+    if (recognizeOptions.baseModelVersion() != null) {
+      urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
+    }
+
+    String url = urlBuilder.toString().replace("https://", "wss://");
+    Request.Builder builder = new Request.Builder().url(url);
+
+    setAuthentication(builder);
+    setDefaultHeaders(builder);
+
+    OkHttpClient client = configureHttpClient();
+    return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
+  }
+
   /**
    * Register a callback.
    *
@@ -1096,11 +1150,7 @@ public ServiceCall<Void> addCorpus(AddCorpusOptions addCorpusOptions) {
     if (addCorpusOptions.allowOverwrite() != null) {
       builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite()));
     }
-    MultipartBody.Builder multipartBuilder = new MultipartBody.Builder();
-    multipartBuilder.setType(MultipartBody.FORM);
-    okhttp3.RequestBody corpusFileBody = RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain");
-    multipartBuilder.addFormDataPart("corpus_file", "filename", corpusFileBody);
-    builder.body(multipartBuilder.build());
+    builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain"));
     ResponseConverter<Void> responseConverter = ResponseConverterUtils.getVoid();
     return createServiceCall(builder.build(), responseConverter);
   }
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import com.google.gson.annotations.SerializedName;
 import com.ibm.cloud.sdk.core.service.model.GenericModel;
 
 /**
@@ -132,7 +133,8 @@ public interface Model {
     String ZH_CN_NARROWBANDMODEL = "zh-CN_NarrowbandModel";
   }
 
-  private InputStream audio;
+  private transient InputStream audio;
+  @SerializedName("content-type")
   private String contentType;
   private String model;
   private String languageCustomizationId;
@@ -153,6 +155,9 @@ public interface Model {
   private String grammarName;
   private Boolean redaction;
   private Boolean audioMetrics;
+  private Boolean interimResults;
+  private Boolean processingMetrics;
+  private Float processingMetricsInterval;
 
   /**
    * Builder.
@@ -179,6 +184,9 @@ public static class Builder {
     private String grammarName;
     private Boolean redaction;
     private Boolean audioMetrics;
+    private Boolean interimResults;
+    private Boolean processingMetrics;
+    private Float processingMetricsInterval;
 
     private Builder(RecognizeOptions recognizeOptions) {
       this.audio = recognizeOptions.audio;
@@ -202,6 +210,9 @@ private Builder(RecognizeOptions recognizeOptions) {
       this.grammarName = recognizeOptions.grammarName;
       this.redaction = recognizeOptions.redaction;
       this.audioMetrics = recognizeOptions.audioMetrics;
+      this.interimResults = recognizeOptions.interimResults;
+      this.processingMetrics = recognizeOptions.processingMetrics;
+      this.processingMetricsInterval = recognizeOptions.processingMetricsInterval;
     }
 
     /**
@@ -488,6 +499,45 @@ public Builder audio(File audio) throws FileNotFoundException {
       this.audio = new FileInputStream(audio);
       return this;
     }
+
+    /**
+     * Set the interimResults.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param interimResults the interimResults
+     * @return the interimResults
+     */
+    public Builder interimResults(Boolean interimResults) {
+      this.interimResults = interimResults;
+      return this;
+    }
+
+    /**
+     * Set the processingMetrics.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param processingMetrics the processingMetrics
+     * @return the processingMetrics
+     */
+    public Builder processingMetrics(Boolean processingMetrics) {
+      this.processingMetrics = processingMetrics;
+      return this;
+    }
+
+    /**
+     * Set the processingMetricsInterval.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param processingMetricsInterval the processingMetricsInterval
+     * @return the processingMetricsInterval
+     */
+    public Builder processingMetricsInterval(Float processingMetricsInterval) {
+      this.processingMetricsInterval = processingMetricsInterval;
+      return this;
+    }
   }
 
   private RecognizeOptions(Builder builder) {
@@ -514,6 +564,9 @@ private RecognizeOptions(Builder builder) {
     grammarName = builder.grammarName;
     redaction = builder.redaction;
     audioMetrics = builder.audioMetrics;
+    interimResults = builder.interimResults;
+    processingMetrics = builder.processingMetrics;
+    processingMetricsInterval = builder.processingMetricsInterval;
   }
 
   /**
@@ -840,4 +893,56 @@ public Boolean redaction() {
   public Boolean audioMetrics() {
     return audioMetrics;
   }
+
+  /**
+   * Gets the interimResults.
+   *
+   * If `true`, the service returns interim results as a stream of `SpeechRecognitionResults` objects. By default,
+   * the service returns a single `SpeechRecognitionResults` object with final results only.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the interimResults
+   */
+  public Boolean interimResults() {
+    return interimResults;
+  }
+
+  /**
+   * Gets the processingMetrics.
+   *
+   * If `true`, requests processing metrics about the service's transcription of the input audio. The service returns
+   * processing metrics at the interval specified by the `processing_metrics_interval` parameter. It also returns
+   * processing metrics for transcription events, for example, for final and interim results. By default, the service
+   * returns no processing metrics.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the processingMetrics
+   */
+  public Boolean processingMetrics() {
+    return processingMetrics;
+  }
+
+  /**
+   * Gets the processingMetricsInterval.
+   *
+   * Specifies the interval in real wall-clock seconds at which the service is to return processing metrics. The
+   * parameter is ignored unless the `processing_metrics` parameter is set to `true`.
+   *
+   * The parameter accepts a minimum value of 0.1 seconds. The level of precision is not restricted, so you can
+   * specify values such as 0.25 and 0.125.
+   *
+   * The service does not impose a maximum value. If you want to receive processing metrics only for transcription
+   * events instead of at periodic intervals, set the value to a large number. If the value is larger than the
+   * duration of the audio, the service returns processing metrics only for transcription events.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the processingMetricsInterval
+   */
+  public Float processingMetricsInterval() {
+    return processingMetricsInterval;
+  }
+
 }
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionAlternative.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionAlternative.java
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
 
   private String transcript;
   private Double confidence;
-  private List<String> timestamps;
+  private List<SpeechTimestamp> timestamps;
   @SerializedName("word_confidence")
-  private List<String> wordConfidence;
+  private List<SpeechWordConfidence> wordConfidence;
 
   /**
    * Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
    *
    * @return the timestamps
    */
-  public List<String> getTimestamps() {
+  public List<SpeechTimestamp> getTimestamps() {
     return timestamps;
   }
 
@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
    *
    * @return the wordConfidence
    */
-  public List<String> getWordConfidence() {
+  public List<SpeechWordConfidence> getWordConfidence() {
     return wordConfidence;
   }
 }

Original file line number	Diff line number	Diff line change
`@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {`
`24`	`24`
`25`	`25`	`private String transcript;`
`26`	`26`	`private Double confidence;`
`27`		`- private List<String> timestamps;`
	`27`	`+ private List<SpeechTimestamp> timestamps;`
`28`	`28`	`@SerializedName("word_confidence")`
`29`		`- private List<String> wordConfidence;`
	`29`	`+ private List<SpeechWordConfidence> wordConfidence;`
`30`	`30`
`31`	`31`	`/**`
`32`	`32`	`* Gets the transcript.`
`@@ -60,7 +60,7 @@ public Double getConfidence() {`
`60`	`60`	`*`
`61`	`61`	`* @return the timestamps`
`62`	`62`	`*/`
`63`		`- public List<String> getTimestamps() {`
	`63`	`+ public List<SpeechTimestamp> getTimestamps() {`
`64`	`64`	`return timestamps;`
`65`	`65`	`}`
`66`	`66`
`@@ -73,7 +73,7 @@ public List<String> getTimestamps() {`
`73`	`73`	`*`
`74`	`74`	`* @return the wordConfidence`
`75`	`75`	`*/`
`76`		`- public List<String> getWordConfidence() {`
	`76`	`+ public List<SpeechWordConfidence> getWordConfidence() {`
`77`	`77`	`return wordConfidence;`
`78`	`78`	`}`
`79`	`79`	`}`