Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/recognize-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class RecognizeStream extends Duplex {
* @param {number} [options.speechDetectorSensitivity] - The sensitivity of speech activity detection that the service is to perform
* @param {number} [options.backgroundAudioSuppression] - The level to which the service is to suppress background audio based on its volume to prevent it from being transcribed as speech
* @param {boolean} [params.lowLatency] - If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, directs the service to produce results even more quickly than it usually does
* @param {number} [params.sadModule] - Detects speech boundaries within the audio stream with better performance, improved noise suppression, faster responsiveness, and increased accuracy.
* @constructor
*/
constructor(options: RecognizeStream.Options) {
Expand Down Expand Up @@ -182,7 +183,8 @@ class RecognizeStream extends Duplex {
'split_transcript_at_phrase_end',
'speech_detector_sensitivity',
'background_audio_suppression',
'low_latency'
'low_latency',
'sad_module'
];
const openingMessage = processUserParameters(options, openingMessageParamsAllowed);
openingMessage.action = 'start';
Expand Down
46 changes: 41 additions & 5 deletions speech-to-text/v1-generated.ts
Original file line number Diff line number Diff line change
Expand Up @@ -339,8 +339,9 @@ class SpeechToTextV1 extends BaseService {
* @param {boolean} [params.speechBeginEvent] - If `true`, the service returns a response object `SpeechActivity`
* which contains the time when a speech activity is detected in the stream. This can be used both in standard and low
* latency mode. This feature enables client applications to know that some words/speech has been detected and the
* service is in the process of decoding. This can be used in lieu of interim results in standard mode. See [Using
* speech recognition
* service is in the process of decoding. This can be used in lieu of interim results in standard mode. Use
* `sad_module: 2` to increase accuracy and performance in detecting speech boundaries within the audio stream. See
* [Using speech recognition
* parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
* @param {string} [params.languageCustomizationId] - The customization ID (GUID) of a custom language model that is
* to be used with the recognition request. The base model of the specified custom language model must match the model
Expand Down Expand Up @@ -508,6 +509,13 @@ class SpeechToTextV1 extends BaseService {
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
* and [Language model
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
* @param {number} [params.sadModule] - Detects speech boundaries within the audio stream with better performance,
* improved noise suppression, faster responsiveness, and increased accuracy.
*
* Specify `sad_module: 2`
*
* See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
* @param {number} [params.backgroundAudioSuppression] - The level to which the service is to suppress background
* audio based on its volume to prevent it from being transcribed as speech. Use the parameter to suppress side
* conversations or background noise.
Expand Down Expand Up @@ -561,7 +569,7 @@ class SpeechToTextV1 extends BaseService {
): Promise<SpeechToTextV1.Response<SpeechToTextV1.SpeechRecognitionResults>> {
const _params = { ...params };
const _requiredParams = ['audio'];
const _validParams = ['audio', 'contentType', 'model', 'speechBeginEvent', 'languageCustomizationId', 'acousticCustomizationId', 'baseModelVersion', 'customizationWeight', 'inactivityTimeout', 'keywords', 'keywordsThreshold', 'maxAlternatives', 'wordAlternativesThreshold', 'wordConfidence', 'timestamps', 'profanityFilter', 'smartFormatting', 'smartFormattingVersion', 'speakerLabels', 'grammarName', 'redaction', 'audioMetrics', 'endOfPhraseSilenceTime', 'splitTranscriptAtPhraseEnd', 'speechDetectorSensitivity', 'backgroundAudioSuppression', 'lowLatency', 'characterInsertionBias', 'signal', 'headers'];
const _validParams = ['audio', 'contentType', 'model', 'speechBeginEvent', 'languageCustomizationId', 'acousticCustomizationId', 'baseModelVersion', 'customizationWeight', 'inactivityTimeout', 'keywords', 'keywordsThreshold', 'maxAlternatives', 'wordAlternativesThreshold', 'wordConfidence', 'timestamps', 'profanityFilter', 'smartFormatting', 'smartFormattingVersion', 'speakerLabels', 'grammarName', 'redaction', 'audioMetrics', 'endOfPhraseSilenceTime', 'splitTranscriptAtPhraseEnd', 'speechDetectorSensitivity', 'sadModule', 'backgroundAudioSuppression', 'lowLatency', 'characterInsertionBias', 'signal', 'headers'];
const _validationErrors = validateParams(_params, _requiredParams, _validParams);
if (_validationErrors) {
return Promise.reject(_validationErrors);
Expand Down Expand Up @@ -592,6 +600,7 @@ class SpeechToTextV1 extends BaseService {
'end_of_phrase_silence_time': _params.endOfPhraseSilenceTime,
'split_transcript_at_phrase_end': _params.splitTranscriptAtPhraseEnd,
'speech_detector_sensitivity': _params.speechDetectorSensitivity,
'sad_module': _params.sadModule,
'background_audio_suppression': _params.backgroundAudioSuppression,
'low_latency': _params.lowLatency,
'character_insertion_bias': _params.characterInsertionBias,
Expand Down Expand Up @@ -1116,6 +1125,13 @@ class SpeechToTextV1 extends BaseService {
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
* and [Language model
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
* @param {number} [params.sadModule] - Detects speech boundaries within the audio stream with better performance,
* improved noise suppression, faster responsiveness, and increased accuracy.
*
* Specify `sad_module: 2`
*
* See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
* @param {number} [params.backgroundAudioSuppression] - The level to which the service is to suppress background
* audio based on its volume to prevent it from being transcribed as speech. Use the parameter to suppress side
* conversations or background noise.
Expand Down Expand Up @@ -1169,7 +1185,7 @@ class SpeechToTextV1 extends BaseService {
): Promise<SpeechToTextV1.Response<SpeechToTextV1.RecognitionJob>> {
const _params = { ...params };
const _requiredParams = ['audio'];
const _validParams = ['audio', 'contentType', 'model', 'callbackUrl', 'events', 'userToken', 'resultsTtl', 'languageCustomizationId', 'acousticCustomizationId', 'baseModelVersion', 'customizationWeight', 'inactivityTimeout', 'keywords', 'keywordsThreshold', 'maxAlternatives', 'wordAlternativesThreshold', 'wordConfidence', 'timestamps', 'profanityFilter', 'smartFormatting', 'smartFormattingVersion', 'speakerLabels', 'grammarName', 'redaction', 'processingMetrics', 'processingMetricsInterval', 'audioMetrics', 'endOfPhraseSilenceTime', 'splitTranscriptAtPhraseEnd', 'speechDetectorSensitivity', 'backgroundAudioSuppression', 'lowLatency', 'characterInsertionBias', 'signal', 'headers'];
const _validParams = ['audio', 'contentType', 'model', 'callbackUrl', 'events', 'userToken', 'resultsTtl', 'languageCustomizationId', 'acousticCustomizationId', 'baseModelVersion', 'customizationWeight', 'inactivityTimeout', 'keywords', 'keywordsThreshold', 'maxAlternatives', 'wordAlternativesThreshold', 'wordConfidence', 'timestamps', 'profanityFilter', 'smartFormatting', 'smartFormattingVersion', 'speakerLabels', 'grammarName', 'redaction', 'processingMetrics', 'processingMetricsInterval', 'audioMetrics', 'endOfPhraseSilenceTime', 'splitTranscriptAtPhraseEnd', 'speechDetectorSensitivity', 'sadModule', 'backgroundAudioSuppression', 'lowLatency', 'characterInsertionBias', 'signal', 'headers'];
const _validationErrors = validateParams(_params, _requiredParams, _validParams);
if (_validationErrors) {
return Promise.reject(_validationErrors);
Expand Down Expand Up @@ -1205,6 +1221,7 @@ class SpeechToTextV1 extends BaseService {
'end_of_phrase_silence_time': _params.endOfPhraseSilenceTime,
'split_transcript_at_phrase_end': _params.splitTranscriptAtPhraseEnd,
'speech_detector_sensitivity': _params.speechDetectorSensitivity,
'sad_module': _params.sadModule,
'background_audio_suppression': _params.backgroundAudioSuppression,
'low_latency': _params.lowLatency,
'character_insertion_bias': _params.characterInsertionBias,
Expand Down Expand Up @@ -4334,7 +4351,8 @@ namespace SpeechToTextV1 {
/** If `true`, the service returns a response object `SpeechActivity` which contains the time when a speech
* activity is detected in the stream. This can be used both in standard and low latency mode. This feature enables
* client applications to know that some words/speech has been detected and the service is in the process of
* decoding. This can be used in lieu of interim results in standard mode. See [Using speech recognition
* decoding. This can be used in lieu of interim results in standard mode. Use `sad_module: 2` to increase accuracy
* and performance in detecting speech boundaries within the audio stream. See [Using speech recognition
* parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
*/
speechBeginEvent?: boolean;
Expand Down Expand Up @@ -4541,6 +4559,15 @@ namespace SpeechToTextV1 {
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
*/
speechDetectorSensitivity?: number;
/** Detects speech boundaries within the audio stream with better performance, improved noise suppression,
* faster responsiveness, and increased accuracy.
*
* Specify `sad_module: 2`
*
* See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
*/
sadModule?: number;
/** The level to which the service is to suppress background audio based on its volume to prevent it from being
* transcribed as speech. Use the parameter to suppress side conversations or background noise.
*
Expand Down Expand Up @@ -5009,6 +5036,15 @@ namespace SpeechToTextV1 {
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
*/
speechDetectorSensitivity?: number;
/** Detects speech boundaries within the audio stream with better performance, improved noise suppression,
* faster responsiveness, and increased accuracy.
*
* Specify `sad_module: 2`
*
* See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
*/
sadModule?: number;
/** The level to which the service is to suppress background audio based on its volume to prevent it from being
* transcribed as speech. Use the parameter to suppress side conversations or background noise.
*
Expand Down
1 change: 1 addition & 0 deletions speech-to-text/v1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ namespace SpeechToTextV1 {
speechDetectorSensitivity?: number;
backgroundAudioSuppression?: number;
characterInsertionBias?: number;
sadModule?: number;
}
}

Expand Down
8 changes: 7 additions & 1 deletion test/unit/speech-to-text.v1.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright IBM Corp. 2018, 2024.
* (C) Copyright IBM Corp. 2025.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -295,6 +295,7 @@ describe('SpeechToTextV1', () => {
const endOfPhraseSilenceTime = 0.8;
const splitTranscriptAtPhraseEnd = false;
const speechDetectorSensitivity = 0.5;
const sadModule = 1;
const backgroundAudioSuppression = 0.0;
const lowLatency = false;
const characterInsertionBias = 0.0;
Expand Down Expand Up @@ -324,6 +325,7 @@ describe('SpeechToTextV1', () => {
endOfPhraseSilenceTime,
splitTranscriptAtPhraseEnd,
speechDetectorSensitivity,
sadModule,
backgroundAudioSuppression,
lowLatency,
characterInsertionBias,
Expand Down Expand Up @@ -368,6 +370,7 @@ describe('SpeechToTextV1', () => {
expect(mockRequestOptions.qs.end_of_phrase_silence_time).toEqual(endOfPhraseSilenceTime);
expect(mockRequestOptions.qs.split_transcript_at_phrase_end).toEqual(splitTranscriptAtPhraseEnd);
expect(mockRequestOptions.qs.speech_detector_sensitivity).toEqual(speechDetectorSensitivity);
expect(mockRequestOptions.qs.sad_module).toEqual(sadModule);
expect(mockRequestOptions.qs.background_audio_suppression).toEqual(backgroundAudioSuppression);
expect(mockRequestOptions.qs.low_latency).toEqual(lowLatency);
expect(mockRequestOptions.qs.character_insertion_bias).toEqual(characterInsertionBias);
Expand Down Expand Up @@ -636,6 +639,7 @@ describe('SpeechToTextV1', () => {
const endOfPhraseSilenceTime = 0.8;
const splitTranscriptAtPhraseEnd = false;
const speechDetectorSensitivity = 0.5;
const sadModule = 1;
const backgroundAudioSuppression = 0.0;
const lowLatency = false;
const characterInsertionBias = 0.0;
Expand Down Expand Up @@ -670,6 +674,7 @@ describe('SpeechToTextV1', () => {
endOfPhraseSilenceTime,
splitTranscriptAtPhraseEnd,
speechDetectorSensitivity,
sadModule,
backgroundAudioSuppression,
lowLatency,
characterInsertionBias,
Expand Down Expand Up @@ -719,6 +724,7 @@ describe('SpeechToTextV1', () => {
expect(mockRequestOptions.qs.end_of_phrase_silence_time).toEqual(endOfPhraseSilenceTime);
expect(mockRequestOptions.qs.split_transcript_at_phrase_end).toEqual(splitTranscriptAtPhraseEnd);
expect(mockRequestOptions.qs.speech_detector_sensitivity).toEqual(speechDetectorSensitivity);
expect(mockRequestOptions.qs.sad_module).toEqual(sadModule);
expect(mockRequestOptions.qs.background_audio_suppression).toEqual(backgroundAudioSuppression);
expect(mockRequestOptions.qs.low_latency).toEqual(lowLatency);
expect(mockRequestOptions.qs.character_insertion_bias).toEqual(characterInsertionBias);
Expand Down
Loading