Skip to content

Commit 292892c

Browse files
committed
returning complete json from recognize stream, aded result-extractor stream for backwards-compatibility
1 parent d295105 commit 292892c

16 files changed

+631
-285
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
4+
### v0.22.0
5+
* Breaking: RecognizeStream now emits the original JSON message rather than the extracted results objects.
6+
* New ResultExtractor stream that can provide the old behavior
7+
* New `extract_results` option on recogniseFile/Microphone enables this.
8+
* Removed derecated `result` and `results` events from RecognizeStream.
9+
* Removed `receive-json` event from RecognizeStream because it now duplicates the behavior of the `data` event.
10+
11+
312
### v0.21.0
413
* Made FormatStream formatting methods available outside of streaming interface
514

dist/watson-speech.js

Lines changed: 270 additions & 132 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/static/index.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ <h2>Speech to Text</h2>
1313
<li><a href="microphone-alternatives.html">Transcribe from Microphone, with Alternatives</a></li>
1414
<li><a href="microphone-word-confidence.html">Transcribe from Microphone, with Word Confidence</a></li>
1515
<li><a href="microphone-streaming-text-to-console.html">Transcribe from Microphone, send text to console</a></li>
16-
<li><a href="microphone-streaming-object-to-console.html">Transcribe from Microphone, send JSON to console (includes text and metadata)</a></li>
16+
<li><a href="microphone-streaming-object-to-console.html">Transcribe from Microphone, send JSON to console (includes text and metadata; v0.22+ format)</a></li>
17+
<li><a href="microphone-streaming-object-extracted-to-console.html">Transcribe from Microphone, send JSON to console with results extracted (pre-v0.22 format)</a></li>
1718
<li><a href="microphone-streaming-model.html">Transcribe from Microphone, Streaming with chosen model</a></li>
1819
<li><a href="file-streaming.html">Transcribe from file, Streaming</a></li>
1920
<li><a href="file-realtime-vs-no-realtime.html">Transcribe from file, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></a></li>
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Watson Speech to Text client example</title>
6+
</head>
7+
<body>
8+
9+
<section>
10+
<h2>Transcribe from Microphone</h2>
11+
<button id="button">Start Microphone Transcription</button>
12+
<button id="stop">Stop</button>
13+
14+
<h2>Output:</h2>
15+
<div id="output">Open your browser's console to view the output. The results are simplified by removing the outer JSON (this was the default behavior before v0.22).</div>
16+
</section>
17+
18+
<script src="watson-speech.js"></script>
19+
<!-- window.fetch pollyfill for IE/Edge & Older Chrome/FireFox -->
20+
<script src="bower_components/fetch/fetch.js"></script>
21+
22+
<h2>Code for this demo:</h2>
23+
24+
<pre><code><script style="display: block;">
25+
document.querySelector('#button').onclick = function () {
26+
27+
fetch('/api/speech-to-text/token')
28+
.then(function(response) {
29+
return response.text();
30+
}).then(function (token) {
31+
32+
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
33+
token: token,
34+
objectMode: true, // send objects instead of text
35+
extractResults: true // convert {results: [{alternatives:[...]}], result_index: 0} to {alternatives: [...], index: 0}
36+
});
37+
38+
stream.on('data', function(data) {
39+
console.log(data);
40+
});
41+
42+
stream.on('error', function(err) {
43+
console.log(err);
44+
});
45+
46+
document.querySelector('#stop').onclick = stream.stop.bind(stream);
47+
48+
}).catch(function(error) {
49+
console.log(error);
50+
});
51+
};
52+
53+
</script></code></pre>
54+
55+
</body>
56+
</html>

examples/static/microphone-streaming-text-to-console.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ <h2>Code for this demo:</h2>
3030
}).then(function (token) {
3131

3232
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
33-
token: token
33+
token: token,
34+
speaker_labels: true
3435
});
3536

3637
stream.setEncoding('utf8'); // get text instead of Buffers for on data events

speech-to-text/format-stream.js

Lines changed: 27 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -113,32 +113,36 @@ FormatStream.prototype.formatString = function(str, isInterim) {
113113
*
114114
* May be used outside of Node.js streams
115115
*
116-
* @param {Object} result
116+
* @param {Object} data
117117
* @returns {Object}
118118
*/
119-
FormatStream.prototype.formatResult = function formatResult(result) {
120-
result = clone(result);
121-
result.alternatives = result.alternatives.map(function(alt) {
122-
alt.transcript = this.formatString(alt.transcript, !result.final);
123-
if (alt.timestamps) {
124-
alt.timestamps = alt.timestamps.map(function(ts, i, arr) {
125-
// timestamps is an array of arrays, each sub-array is in the form ["word", startTime, endTime]'
126-
ts[0] = this.clean(ts[0]);
127-
if (i === 0) {
128-
ts[0] = this.capitalize(ts[0]);
119+
FormatStream.prototype.formatResult = function formatResult(data) {
120+
data = clone(data);
121+
if (Array.isArray(data.results)) {
122+
data.results.forEach(function(result) {
123+
result.alternatives = result.alternatives.map(function(alt) {
124+
alt.transcript = this.formatString(alt.transcript, !result.final);
125+
if (alt.timestamps) {
126+
alt.timestamps = alt.timestamps.map(function(ts, i, arr) {
127+
// timestamps is an array of arrays, each sub-array is in the form ["word", startTime, endTime]'
128+
ts[0] = this.clean(ts[0]);
129+
if (i === 0) {
130+
ts[0] = this.capitalize(ts[0]);
131+
}
132+
if (i === arr.length - 1 && result.final) {
133+
ts[0] = this.period(ts[0]);
134+
}
135+
return ts;
136+
}, this).filter(function(ts) {
137+
return ts[0]; // remove any timestamps without a word (due to cleaning out junk words)
138+
139+
});
129140
}
130-
if (i === arr.length - 1 && result.final) {
131-
ts[0] = this.period(ts[0]);
132-
}
133-
return ts;
134-
}, this).filter(function(ts) {
135-
return ts[0]; // remove any timestamps without a word (due to cleaning out junk words)
136-
137-
});
138-
}
139-
return alt;
140-
}, this);
141-
return result;
141+
return alt;
142+
}, this);
143+
}, this);
144+
}
145+
return data;
142146
};
143147

144148
FormatStream.prototype.promise = require('./to-promise');

speech-to-text/recognize-file.js

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ var FormatStream = require('./format-stream.js');
2222
var TimingStream = require('./timing-stream.js');
2323
var assign = require('object.assign/polyfill')();
2424
var WritableElementStream = require('./writable-element-stream');
25+
var ResultExtractor = require('./result-extractor');
2526

2627
/**
2728
* @module watson-speech/speech-to-text/recognize-file
@@ -38,6 +39,7 @@ var WritableElementStream = require('./writable-element-stream');
3839
* @param {Boolena} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting. Also controls smart_formatting option unless explicitly set.
3940
* @param {Boolena} [options.realtime=options.play] - pipe the text through a {TimingStream} which slows the output down to real-time to match the audio playback.
4041
* @param {String|DOMElement} [options.outputElement] pipe the text to a WriteableElementStream targeting the specified element. Also defaults objectMode to true to enable interim results.
42+
* @param {Boolean} [options.extractResults=false] pipe results through a ResultExtractor stream to simplify the objects. (Default behavior before v0.22) Requires objectMode.
4143
*
4244
* @returns {RecognizeStream|FormatStream|TimingStream}
4345
*/
@@ -50,6 +52,10 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
5052
if (options.outputElement && options.objectMode !== false) {
5153
options.objectMode = true;
5254
}
55+
// the ResultExtractor only works in objectMode
56+
if (options.extractResults && options.objectMode !== false) {
57+
options.objectMode = true;
58+
}
5359

5460
// default format to true (capitals and periods)
5561
// default smart_formatting to options.format value (dates, currency, etc.)
@@ -97,6 +103,17 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
97103
stream.pipe(new WritableElementStream(options));
98104
}
99105

106+
if(options.extractResults) {
107+
var stop = stream.stop.bind(stream);
108+
stream = stream.pipe(new ResultExtractor());
109+
stream.stop = stop;
110+
}
111+
112+
// Capture error from original RecognizeStream
113+
if (stream !== recognizeStream) {
114+
recognizeStream.on('error', stream.emit.bind(stream, 'error'));
115+
}
116+
100117
return stream;
101118
};
102119

speech-to-text/recognize-microphone.js

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ var FormatStream = require('./format-stream.js');
2323
var assign = require('object.assign/polyfill')();
2424
var WritableElementStream = require('./writable-element-stream');
2525
var Writable = require('stream').Writable;
26+
var ResultExtractor = require('./result-extractor');
2627

2728
var preservedMicStream;
2829
var bitBucket = new Writable({
@@ -46,6 +47,7 @@ var bitBucket = new Writable({
4647
* @param {Boolean} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting. Also controls smart_formatting option unless explicitly set.
4748
* @param {Boolean} [options.keepMicrophone=false] - keeps an internal reference to the microphone stream to reuse in subsequent calls (prevents multiple permissions dialogs in firefox)
4849
* @param {String|DOMElement} [options.outputElement] pipe the text to a WriteableElementStream targeting the specified element. Also defaults objectMode to true to enable interim results.
50+
* @param {Boolean} [options.extractResults=false] pipe results through a ResultExtractor stream to simplify the objects. (Default behavior before v0.22) Requires objectMode.
4951
*
5052
* @returns {RecognizeStream|FormatStream}
5153
*/
@@ -58,6 +60,10 @@ module.exports = function recognizeMicrophone(options) {
5860
if (options.outputElement && options.objectMode !== false) {
5961
options.objectMode = true;
6062
}
63+
// the ResultExtractor only works in objectMode
64+
if (options.extractResults && options.objectMode !== false) {
65+
options.objectMode = true;
66+
}
6167

6268
// default format to true (capitals and periods)
6369
// default smart_formatting to options.format value (dates, currency, etc.)
@@ -97,13 +103,16 @@ module.exports = function recognizeMicrophone(options) {
97103
var stream = recognizeStream;
98104
if (options.format) {
99105
stream = stream.pipe(new FormatStream(options));
100-
stream.stop = recognizeStream.stop.bind(recognizeStream);
101106
}
102107

103108
if (options.outputElement) {
104109
stream.pipe(new WritableElementStream(options));
105110
}
106111

112+
if(options.extractResults) {
113+
stream = stream.pipe(new ResultExtractor());
114+
}
115+
107116
getMicStream.catch(function(err) {
108117
stream.emit('error', err);
109118
});
@@ -144,6 +153,7 @@ module.exports = function recognizeMicrophone(options) {
144153
// Capture error from original RecognizeStream
145154
if (stream !== recognizeStream) {
146155
recognizeStream.on('error', stream.emit.bind(stream, 'error'));
156+
stream.stop = recognizeStream.stop.bind(recognizeStream);
147157
}
148158

149159
return stream;

speech-to-text/recognize-stream.js

Lines changed: 39 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,27 @@ var contentType = require('./content-type');
2525
var defaults = require('defaults');
2626
var qs = require('../util/querystring.js');
2727

28-
var OPENING_MESSAGE_PARAMS_ALLOWED = ['continuous', 'max_alternatives', 'timestamps', 'word_confidence', 'inactivity_timeout',
29-
'content-type', 'interim_results', 'keywords', 'keywords_threshold', 'word_alternatives_threshold', 'profanity_filter', 'smart_formatting'];
30-
31-
var QUERY_PARAMS_ALLOWED = ['customization_id','model', 'watson-token']; // , 'X-Watson-Learning-Opt-Out' - should be allowed but currently isn't due to a service bug
28+
var OPENING_MESSAGE_PARAMS_ALLOWED = [
29+
'continuous',
30+
'inactivity_timeout',
31+
'timestamps',
32+
'word_confidence',
33+
'content-type',
34+
'interim_results',
35+
'keywords',
36+
'keywords_threshold',
37+
'max_alternatives',
38+
'word_alternatives_threshold',
39+
'profanity_filter',
40+
'smart_formatting'
41+
];
42+
43+
var QUERY_PARAMS_ALLOWED = [
44+
'customization_id',
45+
'model',
46+
'watson-token',
47+
'X-Watson-Learning-Opt-Out'
48+
];
3249

3350

3451
/**
@@ -123,7 +140,7 @@ var QUERY_PARAMS_ALLOWED = ['customization_id','model', 'watson-token']; // , 'X
123140
* @param {Boolean} [options.profanity_filter=false] - set to true to filter out profanity and replace the words with *'s
124141
* @param {Number} [options.inactivity_timeout=30] - how many seconds of silence before automatically closing the stream (even if continuous is true). use -1 for infinity
125142
* @param {Boolean} [options.readableObjectMode=false] - emit `result` objects instead of string Buffers for the `data` events. Changes several other defaults.
126-
* @param {Number} [options.X-WDC-PL-OPT-OUT=0] - set to 1 to opt-out of allowing Watson to use this request to improve it's services
143+
* @param {Number} [options.X-Watson-Learning-Opt-Out=false] - set to true to opt-out of allowing Watson to use this request to improve it's services
127144
* @param {Boolean} [options.smart_formatting=false] - formats numeric values such as dates, times, currency, etc.
128145
* @param {String} [options.customization_id] - not yet supported on the public STT service
129146
*
@@ -278,12 +295,6 @@ RecognizeStream.prototype.initialize = function() {
278295
return emitError('Invalid JSON received from service:', frame, jsonEx);
279296
}
280297

281-
/**
282-
* @event RecognizeStream#receive-json
283-
* @param {Object} msg - the raw JSON received from Watson - sometimes useful for debugging
284-
*/
285-
self.emit('receive-json', data);
286-
287298
if (data.error) {
288299
emitError(data.error, frame);
289300
} else if (data.state === 'listening') {
@@ -296,43 +307,26 @@ RecognizeStream.prototype.initialize = function() {
296307
self.listening = true;
297308
self.emit('listening');
298309
}
299-
} else if (data.results) {
300-
/**
301-
* Object with array of interim or final results, possibly including confidence scores, alternatives, and word timing. May have no results at all for empty audio files.
302-
* @event RecognizeStream#results
303-
* @param {Object} results
304-
* @deprecated - use objectMode and listen for the 'data' event instead
305-
*/
306-
self.emit('results', data.results);
307-
308-
// note: currently there is always either 0 or 1 entries in the results array. However, this may change in the future.
309-
data.results.forEach(function(result) {
310-
result.index = data.result_index;
310+
} else {
311+
if (options.objectMode || options.readableObjectMode) {
311312
/**
312313
* Object with interim or final results, possibly including confidence scores, alternatives, and word timing.
313-
* @event RecognizeStream#results
314-
* @param {Object} results
315-
* @deprecated - use objectMode and listen for the 'data' event instead
314+
* @event RecognizeStream#data
315+
* @param {Object} data
316316
*/
317-
self.emit('result', result);
318-
if (options.objectMode || options.readableObjectMode) {
319-
/**
320-
* Object with interim or final results, possibly including confidence scores, alternatives, and word timing.
321-
* @event RecognizeStream#data
322-
* @param {Object} data
323-
*/
324-
self.push(result);
325-
} else if (result.final && result.alternatives) {
326-
/**
327-
* Finalized text
328-
* @event RecognizeStream#data
329-
* @param {String} transcript
330-
*/
331-
self.push(result.alternatives[0].transcript, 'utf8');
332-
}
333-
});
334-
} else {
335-
emitError('Unrecognised message from server', frame);
317+
self.push(data);
318+
} else if (Array.isArray(data.results)) {
319+
data.results.forEach(function(result) {
320+
if (result.final && result.alternatives) {
321+
/**
322+
* Finalized text
323+
* @event RecognizeStream#data
324+
* @param {String} transcript
325+
*/
326+
self.push(result.alternatives[0].transcript, 'utf8');
327+
}
328+
});
329+
}
336330
}
337331
};
338332

0 commit comments

Comments
 (0)