11# frozen_string_literal: true
22
3- # (C) Copyright IBM Corp. 2019 .
3+ # (C) Copyright IBM Corp. 2020 .
44#
55# Licensed under the Apache License, Version 2.0 (the "License");
66# you may not use this file except in compliance with the License.
@@ -135,7 +135,7 @@ def get_model(model_id:)
135135 #########################
136136
137137 ##
138- # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil)
138+ # @!method recognize(audio:, content_type: nil, model: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil )
139139 # Recognize audio.
140140 # Sends audio and returns transcription results for a recognition request. You can
141141 # pass a maximum of 100 MB and a minimum of 100 bytes of audio with a request. The
@@ -358,8 +358,38 @@ def get_model(model_id:)
358358 # @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
359359 # input audio. The service returns audio metrics with the final transcription
360360 # results. By default, the service returns no audio metrics.
361+ #
362+ # See [Audio
363+ # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
364+ # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
365+ # splits a transcript into multiple final results. If the service detects pauses or
366+ # extended silence before it reaches the end of the audio stream, its response can
367+ # include multiple final results. Silence indicates a point at which the speaker
368+ # pauses between spoken words or phrases.
369+ #
370+ # Specify a value for the pause interval in the range of 0.0 to 120.0.
371+ # * A value greater than 0 specifies the interval that the service is to use for
372+ # speech recognition.
373+ # * A value of 0 indicates that the service is to use the default interval. It is
374+ # equivalent to omitting the parameter.
375+ #
376+ # The default pause interval for most languages is 0.8 seconds; the default for
377+ # Chinese is 0.6 seconds.
378+ #
379+ # See [End of phrase silence
380+ # time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time).
381+ # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
382+ # based on semantic features of the input, for example, at the conclusion of
383+ # meaningful phrases such as sentences. The service bases its understanding of
384+ # semantic features on the base language model that you use with a request. Custom
385+ # language models and grammars can also influence how and where the service splits a
386+ # transcript. By default, the service splits transcripts based solely on the pause
387+ # interval.
388+ #
389+ # See [Split transcript at phrase
390+ # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
361391 # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
362- def recognize ( audio :, content_type : nil , model : nil , language_customization_id : nil , acoustic_customization_id : nil , base_model_version : nil , customization_weight : nil , inactivity_timeout : nil , keywords : nil , keywords_threshold : nil , max_alternatives : nil , word_alternatives_threshold : nil , word_confidence : nil , timestamps : nil , profanity_filter : nil , smart_formatting : nil , speaker_labels : nil , customization_id : nil , grammar_name : nil , redaction : nil , audio_metrics : nil )
392+ def recognize ( audio :, content_type : nil , model : nil , language_customization_id : nil , acoustic_customization_id : nil , base_model_version : nil , customization_weight : nil , inactivity_timeout : nil , keywords : nil , keywords_threshold : nil , max_alternatives : nil , word_alternatives_threshold : nil , word_confidence : nil , timestamps : nil , profanity_filter : nil , smart_formatting : nil , speaker_labels : nil , customization_id : nil , grammar_name : nil , redaction : nil , audio_metrics : nil , end_of_phrase_silence_time : nil , split_transcript_at_phrase_end : nil )
363393 raise ArgumentError . new ( "audio must be provided" ) if audio . nil?
364394
365395 headers = {
@@ -388,7 +418,9 @@ def recognize(audio:, content_type: nil, model: nil, language_customization_id:
388418 "customization_id" => customization_id ,
389419 "grammar_name" => grammar_name ,
390420 "redaction" => redaction ,
391- "audio_metrics" => audio_metrics
421+ "audio_metrics" => audio_metrics ,
422+ "end_of_phrase_silence_time" => end_of_phrase_silence_time ,
423+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
392424 }
393425
394426 data = audio
@@ -407,7 +439,7 @@ def recognize(audio:, content_type: nil, model: nil, language_customization_id:
407439 end
408440
409441 ##
410- # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil)
442+ # @!method recognize_using_websocket(content_type: nil,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil )
411443 # Sends audio for speech recognition using web sockets.
412444 # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
413445 # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
@@ -469,6 +501,36 @@ def recognize(audio:, content_type: nil, model: nil, language_customization_id:
469501 # input audio. The service returns audio metrics with the final transcription
470502 # results. By default, the service returns no audio metrics.
471503 # @return [WebSocketClient] Returns a new WebSocketClient object
504+ #
505+ # See [Audio
506+ # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
507+ # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
508+ # splits a transcript into multiple final results. If the service detects pauses or
509+ # extended silence before it reaches the end of the audio stream, its response can
510+ # include multiple final results. Silence indicates a point at which the speaker
511+ # pauses between spoken words or phrases.
512+ #
513+ # Specify a value for the pause interval in the range of 0.0 to 120.0.
514+ # * A value greater than 0 specifies the interval that the service is to use for
515+ # speech recognition.
516+ # * A value of 0 indicates that the service is to use the default interval. It is
517+ # equivalent to omitting the parameter.
518+ #
519+ # The default pause interval for most languages is 0.8 seconds; the default for
520+ # Chinese is 0.6 seconds.
521+ #
522+ # See [End of phrase silence
523+ # time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time).
524+ # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
525+ # based on semantic features of the input, for example, at the conclusion of
526+ # meaningful phrases such as sentences. The service bases its understanding of
527+ # semantic features on the base language model that you use with a request. Custom
528+ # language models and grammars can also influence how and where the service splits a
529+ # transcript. By default, the service splits transcripts based solely on the pause
530+ # interval.
531+ #
532+ # See [Split transcript at phrase
533+ # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
472534 def recognize_using_websocket (
473535 content_type : nil ,
474536 recognize_callback :,
@@ -495,7 +557,9 @@ def recognize_using_websocket(
495557 redaction : nil ,
496558 processing_metrics : nil ,
497559 processing_metrics_interval : nil ,
498- audio_metrics : nil
560+ audio_metrics : nil ,
561+ end_of_phrase_silence_time : nil ,
562+ split_transcript_at_phrase_end : nil
499563 )
500564 raise ArgumentError ( "Audio must be provided" ) if audio . nil? && !chunk_data
501565 raise ArgumentError ( "Recognize callback must be provided" ) if recognize_callback . nil?
@@ -532,7 +596,9 @@ def recognize_using_websocket(
532596 "redaction" => redaction ,
533597 "processing_metrics" => processing_metrics ,
534598 "processing_metrics_interval" => processing_metrics_interval ,
535- "audio_metrics" => audio_metrics
599+ "audio_metrics" => audio_metrics ,
600+ "end_of_phrase_silence_time" => end_of_phrase_silence_time ,
601+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
536602 }
537603 options . delete_if { |_ , v | v . nil? }
538604 WebSocketClient . new ( audio : audio , chunk_data : chunk_data , options : options , recognize_callback : recognize_callback , service_url : service_url , headers : headers , disable_ssl_verification : @disable_ssl_verification )
@@ -650,7 +716,7 @@ def unregister_callback(callback_url:)
650716 end
651717
652718 ##
653- # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil)
719+ # @!method create_job(audio:, content_type: nil, model: nil, callback_url: nil, events: nil, user_token: nil, results_ttl: nil, language_customization_id: nil, acoustic_customization_id: nil, base_model_version: nil, customization_weight: nil, inactivity_timeout: nil, keywords: nil, keywords_threshold: nil, max_alternatives: nil, word_alternatives_threshold: nil, word_confidence: nil, timestamps: nil, profanity_filter: nil, smart_formatting: nil, speaker_labels: nil, customization_id: nil, grammar_name: nil, redaction: nil, processing_metrics: nil, processing_metrics_interval: nil, audio_metrics: nil, end_of_phrase_silence_time: nil, split_transcript_at_phrase_end: nil )
654720 # Create a job.
655721 # Creates a job for a new asynchronous recognition request. The job is owned by the
656722 # instance of the service whose credentials are used to create it. How you learn the
@@ -919,6 +985,9 @@ def unregister_callback(callback_url:)
919985 # the `processing_metrics_interval` parameter. It also returns processing metrics
920986 # for transcription events, for example, for final and interim results. By default,
921987 # the service returns no processing metrics.
988+ #
989+ # See [Processing
990+ # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
922991 # @param processing_metrics_interval [Float] Specifies the interval in real wall-clock seconds at which the service is to
923992 # return processing metrics. The parameter is ignored unless the
924993 # `processing_metrics` parameter is set to `true`.
@@ -930,11 +999,44 @@ def unregister_callback(callback_url:)
930999 # metrics only for transcription events instead of at periodic intervals, set the
9311000 # value to a large number. If the value is larger than the duration of the audio,
9321001 # the service returns processing metrics only for transcription events.
1002+ #
1003+ # See [Processing
1004+ # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#processing_metrics).
9331005 # @param audio_metrics [Boolean] If `true`, requests detailed information about the signal characteristics of the
9341006 # input audio. The service returns audio metrics with the final transcription
9351007 # results. By default, the service returns no audio metrics.
1008+ #
1009+ # See [Audio
1010+ # metrics](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-metrics#audio_metrics).
1011+ # @param end_of_phrase_silence_time [Float] If `true`, specifies the duration of the pause interval at which the service
1012+ # splits a transcript into multiple final results. If the service detects pauses or
1013+ # extended silence before it reaches the end of the audio stream, its response can
1014+ # include multiple final results. Silence indicates a point at which the speaker
1015+ # pauses between spoken words or phrases.
1016+ #
1017+ # Specify a value for the pause interval in the range of 0.0 to 120.0.
1018+ # * A value greater than 0 specifies the interval that the service is to use for
1019+ # speech recognition.
1020+ # * A value of 0 indicates that the service is to use the default interval. It is
1021+ # equivalent to omitting the parameter.
1022+ #
1023+ # The default pause interval for most languages is 0.8 seconds; the default for
1024+ # Chinese is 0.6 seconds.
1025+ #
1026+ # See [End of phrase silence
1027+ # time](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#silence_time).
1028+ # @param split_transcript_at_phrase_end [Boolean] If `true`, directs the service to split the transcript into multiple final results
1029+ # based on semantic features of the input, for example, at the conclusion of
1030+ # meaningful phrases such as sentences. The service bases its understanding of
1031+ # semantic features on the base language model that you use with a request. Custom
1032+ # language models and grammars can also influence how and where the service splits a
1033+ # transcript. By default, the service splits transcripts based solely on the pause
1034+ # interval.
1035+ #
1036+ # See [Split transcript at phrase
1037+ # end](https://cloud.ibm.com/docs/services/speech-to-text?topic=speech-to-text-output#split_transcript).
9361038 # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
937- def create_job ( audio :, content_type : nil , model : nil , callback_url : nil , events : nil , user_token : nil , results_ttl : nil , language_customization_id : nil , acoustic_customization_id : nil , base_model_version : nil , customization_weight : nil , inactivity_timeout : nil , keywords : nil , keywords_threshold : nil , max_alternatives : nil , word_alternatives_threshold : nil , word_confidence : nil , timestamps : nil , profanity_filter : nil , smart_formatting : nil , speaker_labels : nil , customization_id : nil , grammar_name : nil , redaction : nil , processing_metrics : nil , processing_metrics_interval : nil , audio_metrics : nil )
1039+ def create_job ( audio :, content_type : nil , model : nil , callback_url : nil , events : nil , user_token : nil , results_ttl : nil , language_customization_id : nil , acoustic_customization_id : nil , base_model_version : nil , customization_weight : nil , inactivity_timeout : nil , keywords : nil , keywords_threshold : nil , max_alternatives : nil , word_alternatives_threshold : nil , word_confidence : nil , timestamps : nil , profanity_filter : nil , smart_formatting : nil , speaker_labels : nil , customization_id : nil , grammar_name : nil , redaction : nil , processing_metrics : nil , processing_metrics_interval : nil , audio_metrics : nil , end_of_phrase_silence_time : nil , split_transcript_at_phrase_end : nil )
9381040 raise ArgumentError . new ( "audio must be provided" ) if audio . nil?
9391041
9401042 headers = {
@@ -969,7 +1071,9 @@ def create_job(audio:, content_type: nil, model: nil, callback_url: nil, events:
9691071 "redaction" => redaction ,
9701072 "processing_metrics" => processing_metrics ,
9711073 "processing_metrics_interval" => processing_metrics_interval ,
972- "audio_metrics" => audio_metrics
1074+ "audio_metrics" => audio_metrics ,
1075+ "end_of_phrase_silence_time" => end_of_phrase_silence_time ,
1076+ "split_transcript_at_phrase_end" => split_transcript_at_phrase_end
9731077 }
9741078
9751079 data = audio
@@ -1104,9 +1208,9 @@ def delete_job(id:)
11041208 # model is owned by the instance of the service whose credentials are used to create
11051209 # it.
11061210 #
1107- # You can create a maximum of 1024 custom language models, per credential. The
1108- # service returns an error if you attempt to create more than 1024 models. You do
1109- # not lose any models, but you cannot create any more until your model count is
1211+ # You can create a maximum of 1024 custom language models per owning credentials.
1212+ # The service returns an error if you attempt to create more than 1024 models. You
1213+ # do not lose any models, but you cannot create any more until your model count is
11101214 # below the limit.
11111215 #
11121216 # **See also:** [Create a custom language
@@ -2230,9 +2334,9 @@ def delete_grammar(customization_id:, grammar_name:)
22302334 # model is owned by the instance of the service whose credentials are used to create
22312335 # it.
22322336 #
2233- # You can create a maximum of 1024 custom acoustic models, per credential. The
2234- # service returns an error if you attempt to create more than 1024 models. You do
2235- # not lose any models, but you cannot create any more until your model count is
2337+ # You can create a maximum of 1024 custom acoustic models per owning credentials.
2338+ # The service returns an error if you attempt to create more than 1024 models. You
2339+ # do not lose any models, but you cannot create any more until your model count is
22362340 # below the limit.
22372341 #
22382342 # **See also:** [Create a custom acoustic
0 commit comments