chore(Speech to Text): Apply manual changes

lpatino10 · lpatino10 · commit f733552d55e0 · 2019-07-23T16:27:58.000-04:00
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java
@@ -83,10 +83,15 @@
 import com.ibm.watson.speech_to_text.v1.model.UpgradeLanguageModelOptions;
 import com.ibm.watson.speech_to_text.v1.model.Word;
 import com.ibm.watson.speech_to_text.v1.model.Words;
+import com.ibm.watson.speech_to_text.v1.websocket.RecognizeCallback;
+import com.ibm.watson.speech_to_text.v1.websocket.SpeechToTextWebSocketListener;
+import okhttp3.HttpUrl;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.WebSocket;
+
 import java.util.Map;
 import java.util.Map.Entry;
-import okhttp3.MultipartBody;
-import okhttp3.RequestBody;
 
 /**
  * The IBM&reg; Speech to Text service provides APIs that use IBM's speech-recognition capabilities to produce
@@ -690,6 +695,54 @@ public ServiceCall<RecognitionJobs> checkJobs() {
     return checkJobs(null);
   }
 
+  /**
+   * Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and
+   * responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer
+   * efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final
+   * results are returned for any request; to enable interim results, set the interimResults parameter to true.
+   *
+   * The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple
+   * utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming
+   * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding.
+   * (For the audio/l16 format, you can specify the endianness.)
+   *
+   * @param recognizeOptions the recognize options
+   * @param callback the {@link RecognizeCallback} instance where results will be sent
+   * @return the {@link WebSocket}
+   */
+  public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) {
+    Validator.notNull(recognizeOptions, "recognizeOptions cannot be null");
+    Validator.notNull(recognizeOptions.audio(), "audio cannot be null");
+    Validator.notNull(callback, "callback cannot be null");
+
+    HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder();
+
+    if (recognizeOptions.model() != null) {
+      urlBuilder.addQueryParameter("model", recognizeOptions.model());
+    }
+    if (recognizeOptions.customizationId() != null) {
+      urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId());
+    }
+    if (recognizeOptions.languageCustomizationId() != null) {
+      urlBuilder.addQueryParameter("language_customization_id", recognizeOptions.languageCustomizationId());
+    }
+    if (recognizeOptions.acousticCustomizationId() != null) {
+      urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId());
+    }
+    if (recognizeOptions.baseModelVersion() != null) {
+      urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion());
+    }
+
+    String url = urlBuilder.toString().replace("https://", "wss://");
+    Request.Builder builder = new Request.Builder().url(url);
+
+    setAuthentication(builder);
+    setDefaultHeaders(builder);
+
+    OkHttpClient client = configureHttpClient();
+    return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback));
+  }
+
   /**
    * Check a job.
    *
@@ -926,10 +979,6 @@ public ServiceCall<Void> deleteLanguageModel(DeleteLanguageModelOptions deleteLa
    * * The service is currently handling another request for the custom model, such as another training request or a
    * request to add a corpus or grammar to the model.
    * * No training data have been added to the custom model.
-   * * The custom model contains one or more invalid corpora, grammars, or words (for example, a custom word has an
-   * invalid sounds-like pronunciation). You can correct the invalid resources or set the `strict` parameter to `false`
-   * to exclude the invalid resources from the training. The model must contain at least one valid resource for training
-   * to succeed.
    *
    * @param trainLanguageModelOptions the {@link TrainLanguageModelOptions} containing the options for the call
    * @return a {@link ServiceCall} with a response type of {@link TrainingResponse}
@@ -1109,11 +1158,7 @@ public ServiceCall<Void> addCorpus(AddCorpusOptions addCorpusOptions) {
     if (addCorpusOptions.allowOverwrite() != null) {
       builder.query("allow_overwrite", String.valueOf(addCorpusOptions.allowOverwrite()));
     }
-    MultipartBody.Builder multipartBuilder = new MultipartBody.Builder();
-    multipartBuilder.setType(MultipartBody.FORM);
-    RequestBody corpusFileBody = RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain");
-    multipartBuilder.addFormDataPart("corpus_file", "filename", corpusFileBody);
-    builder.body(multipartBuilder.build());
+    builder.body(RequestUtils.inputStreamBody(addCorpusOptions.corpusFile(), "text/plain"));
     ResponseConverter<Void> responseConverter = ResponseConverterUtils.getVoid();
     return createServiceCall(builder.build(), responseConverter);
   }
@@ -1736,9 +1781,6 @@ public ServiceCall<Void> deleteAcousticModel(DeleteAcousticModelOptions deleteAc
    * * The custom model contains less than 10 minutes or more than 200 hours of audio data.
    * * You passed an incompatible custom language model with the `custom_language_model_id` query parameter. Both custom
    * models must be based on the same version of the same base model.
-   * * The custom model contains one or more invalid audio resources. You can correct the invalid audio resources or set
-   * the `strict` parameter to `false` to exclude the invalid resources from the training. The model must contain at
-   * least one valid resource for training to succeed.
    *
    * @param trainAcousticModelOptions the {@link TrainAcousticModelOptions} containing the options for the call
    * @return a {@link ServiceCall} with a response type of {@link TrainingResponse}
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeOptions.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import com.google.gson.annotations.SerializedName;
 import com.ibm.cloud.sdk.core.service.model.GenericModel;
 import com.ibm.cloud.sdk.core.util.Validator;
 
@@ -113,7 +114,7 @@ public interface ContentType {
     String AUDIO_WEBM_CODECS_VORBIS = "audio/webm;codecs=vorbis";
   }
 
-  private InputStream audio;
+  private transient InputStream audio;
   private String model;
   private String languageCustomizationId;
   private String acousticCustomizationId;
@@ -133,7 +134,11 @@ public interface ContentType {
   private String grammarName;
   private Boolean redaction;
   private Boolean audioMetrics;
+  @SerializedName("content-type")
   private String contentType;
+  private Boolean interimResults;
+  private Boolean processingMetrics;
+  private Float processingMetricsInterval;
 
   /**
    * Builder.
@@ -160,6 +165,9 @@ public static class Builder {
     private Boolean redaction;
     private Boolean audioMetrics;
     private String contentType;
+    private Boolean interimResults;
+    private Boolean processingMetrics;
+    private Float processingMetricsInterval;
 
     private Builder(RecognizeOptions recognizeOptions) {
       this.audio = recognizeOptions.audio;
@@ -183,6 +191,9 @@ private Builder(RecognizeOptions recognizeOptions) {
       this.redaction = recognizeOptions.redaction;
       this.audioMetrics = recognizeOptions.audioMetrics;
       this.contentType = recognizeOptions.contentType;
+      this.interimResults = recognizeOptions.interimResults;
+      this.processingMetrics = recognizeOptions.processingMetrics;
+      this.processingMetricsInterval = recognizeOptions.processingMetricsInterval;
     }
 
     /**
@@ -468,6 +479,45 @@ public Builder audio(File audio) throws FileNotFoundException {
       this.audio = new FileInputStream(audio);
       return this;
     }
+
+    /**
+     * Set the interimResults.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param interimResults the interimResults
+     * @return the interimResults
+     */
+    public Builder interimResults(Boolean interimResults) {
+      this.interimResults = interimResults;
+      return this;
+    }
+
+    /**
+     * Set the processingMetrics.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param processingMetrics the processingMetrics
+     * @return the processingMetrics
+     */
+    public Builder processingMetrics(Boolean processingMetrics) {
+      this.processingMetrics = processingMetrics;
+      return this;
+    }
+
+    /**
+     * Set the processingMetricsInterval.
+     *
+     * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+     *
+     * @param processingMetricsInterval the processingMetricsInterval
+     * @return the processingMetricsInterval
+     */
+    public Builder processingMetricsInterval(Float processingMetricsInterval) {
+      this.processingMetricsInterval = processingMetricsInterval;
+      return this;
+    }
   }
 
   private RecognizeOptions(Builder builder) {
@@ -493,6 +543,9 @@ private RecognizeOptions(Builder builder) {
     redaction = builder.redaction;
     audioMetrics = builder.audioMetrics;
     contentType = builder.contentType;
+    interimResults = builder.interimResults;
+    processingMetrics = builder.processingMetrics;
+    processingMetricsInterval = builder.processingMetricsInterval;
   }
 
   /**
@@ -819,4 +872,55 @@ public Boolean audioMetrics() {
   public String contentType() {
     return contentType;
   }
+
+  /**
+   * Gets the interimResults.
+   *
+   * If `true`, the service returns interim results as a stream of `SpeechRecognitionResults` objects. By default,
+   * the service returns a single `SpeechRecognitionResults` object with final results only.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the interimResults
+   */
+  public Boolean interimResults() {
+    return interimResults;
+  }
+
+  /**
+   * Gets the processingMetrics.
+   *
+   * If `true`, requests processing metrics about the service's transcription of the input audio. The service returns
+   * processing metrics at the interval specified by the `processing_metrics_interval` parameter. It also returns
+   * processing metrics for transcription events, for example, for final and interim results. By default, the service
+   * returns no processing metrics.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the processingMetrics
+   */
+  public Boolean processingMetrics() {
+    return processingMetrics;
+  }
+
+  /**
+   * Gets the processingMetricsInterval.
+   *
+   * Specifies the interval in real wall-clock seconds at which the service is to return processing metrics. The
+   * parameter is ignored unless the `processing_metrics` parameter is set to `true`.
+   *
+   * The parameter accepts a minimum value of 0.1 seconds. The level of precision is not restricted, so you can
+   * specify values such as 0.25 and 0.125.
+   *
+   * The service does not impose a maximum value. If you want to receive processing metrics only for transcription
+   * events instead of at periodic intervals, set the value to a large number. If the value is larger than the
+   * duration of the audio, the service returns processing metrics only for transcription events.
+   *
+   * NOTE: This parameter only works for the `recognizeUsingWebSocket` method.
+   *
+   * @return the processingMetricsInterval
+   */
+  public Float processingMetricsInterval() {
+    return processingMetricsInterval;
+  }
 }
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionAlternative.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionAlternative.java
@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {
 
   private String transcript;
   private Double confidence;
-  private List<String> timestamps;
+  private List<SpeechTimestamp> timestamps;
   @SerializedName("word_confidence")
-  private List<String> wordConfidence;
+  private List<SpeechWordConfidence> wordConfidence;
 
   /**
    * Gets the transcript.
@@ -60,7 +60,7 @@ public Double getConfidence() {
    *
    * @return the timestamps
    */
-  public List<String> getTimestamps() {
+  public List<SpeechTimestamp> getTimestamps() {
     return timestamps;
   }
 
@@ -73,7 +73,7 @@ public List<String> getTimestamps() {
    *
    * @return the wordConfidence
    */
-  public List<String> getWordConfidence() {
+  public List<SpeechWordConfidence> getWordConfidence() {
     return wordConfidence;
   }
 }
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionResult.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/SpeechRecognitionResult.java
@@ -27,7 +27,7 @@ public class SpeechRecognitionResult extends GenericModel {
   private Boolean finalResults;
   private List<SpeechRecognitionAlternative> alternatives;
   @SerializedName("keywords_result")
-  private Map keywordsResult;
+  private Map<String, List<KeywordResult>> keywordsResult;
   @SerializedName("word_alternatives")
   private List<WordAlternativeResults> wordAlternatives;
 
@@ -65,7 +65,7 @@ public List<SpeechRecognitionAlternative> getAlternatives() {
    *
    * @return the keywordsResult
    */
-  public Map getKeywordsResult() {
+  public Map<String, List<KeywordResult>> getKeywordsResult() {
     return keywordsResult;
   }
 

Original file line number	Diff line number	Diff line change
`@@ -24,9 +24,9 @@ public class SpeechRecognitionAlternative extends GenericModel {`
`24`	`24`
`25`	`25`	`private String transcript;`
`26`	`26`	`private Double confidence;`
`27`		`- private List<String> timestamps;`
	`27`	`+ private List<SpeechTimestamp> timestamps;`
`28`	`28`	`@SerializedName("word_confidence")`
`29`		`- private List<String> wordConfidence;`
	`29`	`+ private List<SpeechWordConfidence> wordConfidence;`
`30`	`30`
`31`	`31`	`/**`
`32`	`32`	`* Gets the transcript.`
`@@ -60,7 +60,7 @@ public Double getConfidence() {`
`60`	`60`	`*`
`61`	`61`	`* @return the timestamps`
`62`	`62`	`*/`
`63`		`- public List<String> getTimestamps() {`
	`63`	`+ public List<SpeechTimestamp> getTimestamps() {`
`64`	`64`	`return timestamps;`
`65`	`65`	`}`
`66`	`66`
`@@ -73,7 +73,7 @@ public List<String> getTimestamps() {`
`73`	`73`	`*`
`74`	`74`	`* @return the wordConfidence`
`75`	`75`	`*/`
`76`		`- public List<String> getWordConfidence() {`
	`76`	`+ public List<SpeechWordConfidence> getWordConfidence() {`
`77`	`77`	`return wordConfidence;`
`78`	`78`	`}`
`79`	`79`	`}`