|
71 | 71 | import com.ibm.watson.developer_cloud.speech_to_text.v1.model.UpgradeLanguageModelOptions; |
72 | 72 | import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Word; |
73 | 73 | import com.ibm.watson.developer_cloud.speech_to_text.v1.model.Words; |
| 74 | +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeCallback; |
| 75 | +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.SpeechToTextWebSocketListener; |
74 | 76 | import com.ibm.watson.developer_cloud.util.GsonSingleton; |
75 | 77 | import com.ibm.watson.developer_cloud.util.RequestUtils; |
76 | 78 | import com.ibm.watson.developer_cloud.util.ResponseConverterUtils; |
77 | 79 | import com.ibm.watson.developer_cloud.util.Validator; |
| 80 | +import okhttp3.HttpUrl; |
78 | 81 | import okhttp3.MediaType; |
79 | 82 | import okhttp3.MultipartBody; |
| 83 | +import okhttp3.OkHttpClient; |
| 84 | +import okhttp3.Request; |
80 | 85 | import okhttp3.RequestBody; |
| 86 | +import okhttp3.WebSocket; |
81 | 87 |
|
82 | 88 | /** |
83 | 89 | * The IBM® Speech to Text service provides an API that uses IBM's speech-recognition capabilities to produce |
@@ -324,6 +330,55 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz |
324 | 330 | return createServiceCall(builder.build(), ResponseConverterUtils.getObject(SpeechRecognitionResults.class)); |
325 | 331 | } |
326 | 332 |
|
| 333 | + /** |
| 334 | + * Sends audio and returns transcription results for recognition requests over a WebSocket connection. Requests and |
| 335 | + * responses are enabled over a single TCP connection that abstracts much of the complexity of the request to offer |
| 336 | + * efficient implementation, low latency, high throughput, and an asynchronous response. By default, only final |
| 337 | + * results are returned for any request; to enable interim results, set the interimResults parameter to true. |
| 338 | + * |
| 339 | + * The service imposes a data size limit of 100 MB per utterance (per recognition request). You can send multiple |
| 340 | + * utterances over a single WebSocket connection. The service automatically detects the endianness of the incoming |
| 341 | + * audio and, for audio that includes multiple channels, downmixes the audio to one-channel mono during transcoding. |
| 342 | + * (For the audio/l16 format, you can specify the endianness.) |
| 343 | + * |
| 344 | + * @param recognizeOptions the recognize options |
| 345 | + * @param callback the {@link RecognizeCallback} instance where results will be sent |
| 346 | + * @return the {@link WebSocket} |
| 347 | + */ |
| 348 | + public WebSocket recognizeUsingWebSocket(RecognizeOptions recognizeOptions, RecognizeCallback callback) { |
| 349 | + Validator.notNull(recognizeOptions, "recognizeOptions cannot be null"); |
| 350 | + Validator.notNull(recognizeOptions.audio(), "audio cannot be null"); |
| 351 | + Validator.notNull(callback, "callback cannot be null"); |
| 352 | + |
| 353 | + HttpUrl.Builder urlBuilder = HttpUrl.parse(getEndPoint() + "/v1/recognize").newBuilder(); |
| 354 | + |
| 355 | + if (recognizeOptions.model() != null) { |
| 356 | + urlBuilder.addQueryParameter("model", recognizeOptions.model()); |
| 357 | + } |
| 358 | + if (recognizeOptions.customizationId() != null) { |
| 359 | + urlBuilder.addQueryParameter("customization_id", recognizeOptions.customizationId()); |
| 360 | + } |
| 361 | + if (recognizeOptions.acousticCustomizationId() != null) { |
| 362 | + urlBuilder.addQueryParameter("acoustic_customization_id", recognizeOptions.acousticCustomizationId()); |
| 363 | + } |
| 364 | + if (recognizeOptions.baseModelVersion() != null) { |
| 365 | + urlBuilder.addQueryParameter("base_model_version", recognizeOptions.baseModelVersion()); |
| 366 | + } |
| 367 | + if (recognizeOptions.customizationWeight() != null) { |
| 368 | + urlBuilder.addQueryParameter("customization_weight", |
| 369 | + String.valueOf(recognizeOptions.customizationWeight())); |
| 370 | + } |
| 371 | + |
| 372 | + String url = urlBuilder.toString().replace("https://", "wss://"); |
| 373 | + Request.Builder builder = new Request.Builder().url(url); |
| 374 | + |
| 375 | + setAuthentication(builder); |
| 376 | + setDefaultHeaders(builder); |
| 377 | + |
| 378 | + OkHttpClient client = configureHttpClient(); |
| 379 | + return client.newWebSocket(builder.build(), new SpeechToTextWebSocketListener(recognizeOptions, callback)); |
| 380 | + } |
| 381 | + |
327 | 382 | /** |
328 | 383 | * Check a job. |
329 | 384 | * |
|
0 commit comments