Skip to content

Commit abf765c

Browse files
committed
partial work towards supporting objectMode output
1 parent 1a4b430 commit abf765c

File tree

5 files changed

+86
-51
lines changed

5 files changed

+86
-51
lines changed

dist/watson-speech.js

Lines changed: 41 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5808,19 +5808,13 @@ function FormatStream(opts) {
58085808
this.opts = util._extend({
58095809
model: '', // some models should have all spaces removed
58105810
hesitation: '\u2026', // ellipsis
5811-
decodeStrings: true
5811+
decodeStrings: true,
5812+
objectMode: true
58125813
}, opts);
58135814
Transform.call(this, opts);
58145815

58155816
this.isJaCn = ((this.opts.model.substring(0,5) === 'ja-JP') || (this.opts.model.substring(0,5) === 'zh-CN'));
5816-
5817-
var self = this;
5818-
this.on('pipe', function(source) {
5819-
source.on('result', self.handleResult.bind(self));
5820-
if(source.stop) {
5821-
self.stop = source.stop.bind(source);
5822-
}
5823-
});
5817+
this._transform = opts.objectMode ? this.formatResult : this.formatString;
58245818
}
58255819
util.inherits(FormatStream, Transform);
58265820

@@ -5872,7 +5866,7 @@ FormatStream.prototype.period = function period(text) {
58725866
return text + (this.isJaCn ? '。' : '. ')
58735867
};
58745868

5875-
FormatStream.prototype._transform = function(chunk, encoding, next) {
5869+
FormatStream.prototype.formatString = function(chunk, encoding, next) {
58765870
this.push(this.period(this.capitalize(this.clean(chunk.toString()))));
58775871
next();
58785872
};
@@ -5882,7 +5876,7 @@ FormatStream.prototype._transform = function(chunk, encoding, next) {
58825876
*
58835877
* @param result
58845878
*/
5885-
FormatStream.prototype.handleResult = function handleResult(result) {
5879+
FormatStream.prototype.formatResult = function handleResult(result) {
58865880
result = clone(result);
58875881
result.alternatives = result.alternatives.map(function(alt) {
58885882
alt.transcript = this.capitalize(this.clean(alt.transcript));
@@ -6368,20 +6362,21 @@ var QUERY_PARAMS_ALLOWED = ['model', 'watson-token']; //, 'X-Watson-Learning-Opt
63686362
* @param options
63696363
* @param {String} [options.model='en-US_BroadbandModel'] - voice model to use. Microphone streaming only supports broadband models.
63706364
* @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service
6371-
* @param {String} [options.content-type='audio/wav'] - content type of audio; should be automatically determined in most cases
6372-
* @param {Boolean} [options.interim_results=true] - Send back non-final previews of each "sentence" as it is being processed
6365+
* @param {String} [options.content-type='audio/wav'] - content type of audio; can be automatically determined from file header in most cases. only wav, flac, and ogg/opus are supported
6366+
* @param {Boolean} [options.interim_results=false] - Send back non-final previews of each "sentence" as it is being processed. Defaults to true when in objectMode.
63736367
* @param {Boolean} [options.continuous=true] - set to false to automatically stop the transcription after the first "sentence"
6374-
* @param {Boolean} [options.word_confidence=true] - include confidence scores with results
6375-
* @param {Boolean} [options.timestamps=true] - include timestamps with results
6376-
* @param {Number} [options.max_alternatives=3] - maximum number of alternative transcriptions to include
6368+
* @param {Boolean} [options.word_confidence=false] - include confidence scores with results. Defaults to true when in objectMode.
6369+
* @param {Boolean} [options.timestamps=false] - include timestamps with results. Defaults to true when in objectMode.
6370+
* @param {Number} [options.max_alternatives=1] - maximum number of alternative transcriptions to include. Defaults to 3 when in objectMode.
63776371
* @param {Number} [options.inactivity_timeout=30] - how many seconds of silence before automatically closing the stream (even if continuous is true). use -1 for infinity
6378-
6372+
* @param {Boolean} [options.objectMode=false] - emit `result` objects instead of string Buffers for the `data` events. Changes several other defaults.
6373+
*
63796374
* //todo: investigate other options at http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/apis/#!/speech-to-text/recognizeSessionless
63806375
*
63816376
* @constructor
63826377
*/
63836378
function RecognizeStream(options) {
6384-
Duplex.call(this, options);
6379+
Duplex.call(this, {readableObjectMode: options && (options.objectMode)});
63856380
this.options = options;
63866381
this.listening = false;
63876382
this.initialized = false;
@@ -6424,16 +6419,32 @@ RecognizeStream.prototype.initialize = function () {
64246419

64256420
var url = (options.url || "wss://stream.watsonplatform.net/speech-to-text/api").replace(/^http/, 'ws') + '/v1/recognize?' + queryString;
64266421

6422+
// turn off all the extras if we're just outputting a string with a single final result
6423+
var defaults = {
6424+
interim_results: false,
6425+
word_confidence: false,
6426+
timestamps: false,
6427+
max_alternatives: 1
6428+
};
6429+
6430+
// but turn everything on if we're in objectMode and the end user can consume it
6431+
var objectModeDefaults = {
6432+
interim_results: true,
6433+
word_confidence: true,
6434+
timestamps: true,
6435+
max_alternatives: 3
6436+
};
6437+
64276438
var openingMessage = util._extend({
64286439
action: 'start',
64296440
'content-type': 'audio/wav',
64306441
continuous: true,
6431-
interim_results: true,
6432-
word_confidence: true,
6433-
timestamps: true,
64346442
max_alternatives: 3,
64356443
inactivity_timeout: 30
6436-
}, pick(options, OPENING_MESSAGE_PARAMS_ALLOWED));
6444+
},
6445+
options.objectMode ? objectModeDefaults : defaults,
6446+
pick(options, OPENING_MESSAGE_PARAMS_ALLOWED)
6447+
);
64376448

64386449

64396450
var self = this;
@@ -6513,7 +6524,7 @@ RecognizeStream.prototype.initialize = function () {
65136524
* Object with interim or final results, including possible alternatives. May have no results at all for empty audio files.
65146525
* @event RecognizeStream#results
65156526
* @param {Object} results
6516-
* @deprecated - use the 'result' event (singular) instead
6527+
* @deprecated - use objectMode instead
65176528
*/
65186529
self.emit('results', data.results);
65196530

@@ -6523,6 +6534,7 @@ RecognizeStream.prototype.initialize = function () {
65236534
* Object with interim or final results, including possible alternatives. May have no results at all for empty audio files.
65246535
* @event RecognizeStream#results
65256536
* @param {Object} results
6537+
* @deprecated - use objectMode instead
65266538
*/
65276539
result.index = data.result_index;
65286540
self.emit('result', result);
@@ -6532,7 +6544,12 @@ RecognizeStream.prototype.initialize = function () {
65326544
* @event RecognizeStream#data
65336545
* @param {String} transcript
65346546
*/
6535-
self.push(result.alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams
6547+
if (options.objectMode) {
6548+
self.push(result); // this is the "data" event that can be easily piped to other streams
6549+
} else {
6550+
self.push(result.alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams
6551+
}
6552+
65366553
}
65376554
});
65386555
} else {

examples/public/audio-element-programmatic.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ <h2>Code for this demo:</h2>
2929
$output.html('');
3030

3131
var audioElement = new Audio(); // document.createElement('video'); also works here
32-
audioElement.src = "audio.wav";
32+
audioElement.src = "Us_English_Broadband_Sample_1.wav";
3333

3434
var stream = WatsonSpeech.SpeechToText.recognizeElement({
3535
token: token,

examples/public/audio-element.html

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,16 @@ <h2>Code for this demo:</h2>
3232

3333
var stream = WatsonSpeech.SpeechToText.recognizeElement({
3434
token: token,
35-
element: $('#audio-element')[0]
35+
element: $('#audio-element')[0],
36+
objectMode: true, // necessary to receive interim results
3637
// muteSource: true // prevents sound from also playing locally
3738
});
3839

3940
// each result gets it's own <span> because watson will sometimes go back and change a word as it hears more context
4041
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);
4142

4243
// a result is approximately equivalent to a sentence
43-
stream.on('result', function(result) {
44+
stream.on('data', function(result) {
4445
// update the text for the current sentence with the default alternative.
4546
// there may be multiple alternatives but this example app ignores all but the first.
4647
$curSentence.html(result.alternatives[0].transcript);

speech-to-text/format-stream.js

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,13 @@ function FormatStream(opts) {
2020
this.opts = util._extend({
2121
model: '', // some models should have all spaces removed
2222
hesitation: '\u2026', // ellipsis
23-
decodeStrings: true
23+
decodeStrings: true,
24+
objectMode: true
2425
}, opts);
2526
Transform.call(this, opts);
2627

2728
this.isJaCn = ((this.opts.model.substring(0,5) === 'ja-JP') || (this.opts.model.substring(0,5) === 'zh-CN'));
28-
29-
var self = this;
30-
this.on('pipe', function(source) {
31-
source.on('result', self.handleResult.bind(self));
32-
if(source.stop) {
33-
self.stop = source.stop.bind(source);
34-
}
35-
});
29+
this._transform = opts.objectMode ? this.formatResult : this.formatString;
3630
}
3731
util.inherits(FormatStream, Transform);
3832

@@ -84,7 +78,7 @@ FormatStream.prototype.period = function period(text) {
8478
return text + (this.isJaCn ? '。' : '. ')
8579
};
8680

87-
FormatStream.prototype._transform = function(chunk, encoding, next) {
81+
FormatStream.prototype.formatString = function(chunk, encoding, next) {
8882
this.push(this.period(this.capitalize(this.clean(chunk.toString()))));
8983
next();
9084
};
@@ -94,7 +88,7 @@ FormatStream.prototype._transform = function(chunk, encoding, next) {
9488
*
9589
* @param result
9690
*/
97-
FormatStream.prototype.handleResult = function handleResult(result) {
91+
FormatStream.prototype.formatResult = function handleResult(result) {
9892
result = clone(result);
9993
result.alternatives = result.alternatives.map(function(alt) {
10094
alt.transcript = this.capitalize(this.clean(alt.transcript));

speech-to-text/recognize-stream.js

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,21 @@ var QUERY_PARAMS_ALLOWED = ['model', 'watson-token']; //, 'X-Watson-Learning-Opt
4040
* @param options
4141
* @param {String} [options.model='en-US_BroadbandModel'] - voice model to use. Microphone streaming only supports broadband models.
4242
* @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service
43-
* @param {String} [options.content-type='audio/wav'] - content type of audio; should be automatically determined in most cases
44-
* @param {Boolean} [options.interim_results=true] - Send back non-final previews of each "sentence" as it is being processed
43+
* @param {String} [options.content-type='audio/wav'] - content type of audio; can be automatically determined from file header in most cases. only wav, flac, and ogg/opus are supported
44+
* @param {Boolean} [options.interim_results=false] - Send back non-final previews of each "sentence" as it is being processed. Defaults to true when in objectMode.
4545
* @param {Boolean} [options.continuous=true] - set to false to automatically stop the transcription after the first "sentence"
46-
* @param {Boolean} [options.word_confidence=true] - include confidence scores with results
47-
* @param {Boolean} [options.timestamps=true] - include timestamps with results
48-
* @param {Number} [options.max_alternatives=3] - maximum number of alternative transcriptions to include
46+
* @param {Boolean} [options.word_confidence=false] - include confidence scores with results. Defaults to true when in objectMode.
47+
* @param {Boolean} [options.timestamps=false] - include timestamps with results. Defaults to true when in objectMode.
48+
* @param {Number} [options.max_alternatives=1] - maximum number of alternative transcriptions to include. Defaults to 3 when in objectMode.
4949
* @param {Number} [options.inactivity_timeout=30] - how many seconds of silence before automatically closing the stream (even if continuous is true). use -1 for infinity
50-
50+
* @param {Boolean} [options.objectMode=false] - emit `result` objects instead of string Buffers for the `data` events. Changes several other defaults.
51+
*
5152
* //todo: investigate other options at http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/apis/#!/speech-to-text/recognizeSessionless
5253
*
5354
* @constructor
5455
*/
5556
function RecognizeStream(options) {
56-
Duplex.call(this, options);
57+
Duplex.call(this, {readableObjectMode: options && (options.objectMode)});
5758
this.options = options;
5859
this.listening = false;
5960
this.initialized = false;
@@ -96,16 +97,32 @@ RecognizeStream.prototype.initialize = function () {
9697

9798
var url = (options.url || "wss://stream.watsonplatform.net/speech-to-text/api").replace(/^http/, 'ws') + '/v1/recognize?' + queryString;
9899

100+
// turn off all the extras if we're just outputting a string with a single final result
101+
var defaults = {
102+
interim_results: false,
103+
word_confidence: false,
104+
timestamps: false,
105+
max_alternatives: 1
106+
};
107+
108+
// but turn everything on if we're in objectMode and the end user can consume it
109+
var objectModeDefaults = {
110+
interim_results: true,
111+
word_confidence: true,
112+
timestamps: true,
113+
max_alternatives: 3
114+
};
115+
99116
var openingMessage = util._extend({
100117
action: 'start',
101118
'content-type': 'audio/wav',
102119
continuous: true,
103-
interim_results: true,
104-
word_confidence: true,
105-
timestamps: true,
106120
max_alternatives: 3,
107121
inactivity_timeout: 30
108-
}, pick(options, OPENING_MESSAGE_PARAMS_ALLOWED));
122+
},
123+
options.objectMode ? objectModeDefaults : defaults,
124+
pick(options, OPENING_MESSAGE_PARAMS_ALLOWED)
125+
);
109126

110127

111128
var self = this;
@@ -185,7 +202,7 @@ RecognizeStream.prototype.initialize = function () {
185202
* Object with interim or final results, including possible alternatives. May have no results at all for empty audio files.
186203
* @event RecognizeStream#results
187204
* @param {Object} results
188-
* @deprecated - use the 'result' event (singular) instead
205+
* @deprecated - use objectMode instead
189206
*/
190207
self.emit('results', data.results);
191208

@@ -195,6 +212,7 @@ RecognizeStream.prototype.initialize = function () {
195212
* Object with interim or final results, including possible alternatives. May have no results at all for empty audio files.
196213
* @event RecognizeStream#results
197214
* @param {Object} results
215+
* @deprecated - use objectMode instead
198216
*/
199217
result.index = data.result_index;
200218
self.emit('result', result);
@@ -204,7 +222,12 @@ RecognizeStream.prototype.initialize = function () {
204222
* @event RecognizeStream#data
205223
* @param {String} transcript
206224
*/
207-
self.push(result.alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams
225+
if (options.objectMode) {
226+
self.push(result); // this is the "data" event that can be easily piped to other streams
227+
} else {
228+
self.push(result.alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams
229+
}
230+
208231
}
209232
});
210233
} else {

0 commit comments

Comments
 (0)