Skip to content

Commit 4221d97

Browse files
committed
speaker_labels support
1 parent 292892c commit 4221d97

16 files changed

+2259
-19
lines changed

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
* Breaking: RecognizeStream now emits the original JSON message rather than the extracted results objects.
66
* New ResultExtractor stream that can provide the old behavior
77
* New `extract_results` option on recogniseFile/Microphone enables this.
8-
* Removed derecated `result` and `results` events from RecognizeStream.
8+
* Removed deprecated `result` and `results` events from RecognizeStream.
99
* Removed `receive-json` event from RecognizeStream because it now duplicates the behavior of the `data` event.
10-
10+
* Added support for `speaker_labels` option in RecognizeStream, updated other streams to handle speaker_labels correctly
11+
* Added a simple speaker_labels stream-to-console example
12+
* Added support for ` X-Watson-Learning-Opt-Out` option in RecognizeStream
1113

1214
### v0.21.0
1315
* Made FormatStream formatting methods available outside of streaming interface

dist/watson-speech.js

Lines changed: 44 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Binary file not shown.

examples/static/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ <h2>Speech to Text</h2>
1717
<li><a href="microphone-streaming-object-extracted-to-console.html">Transcribe from Microphone, send JSON to console with results extracted (pre-v0.22 format)</a></li>
1818
<li><a href="microphone-streaming-model.html">Transcribe from Microphone, Streaming with chosen model</a></li>
1919
<li><a href="file-streaming.html">Transcribe from file, Streaming</a></li>
20+
<li><a href="multi-speaker-file-console.html">Transcribe from file, multiple speakers</a></li>
2021
<li><a href="file-realtime-vs-no-realtime.html">Transcribe from file, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></a></li>
2122
<li><a href="file-promise.html">Transcribe from file, Promise</a></li>
2223
<li><a href="file-ajax.html">Transcribe from file loaded over AJAX</a></li>

examples/static/microphone-streaming-object-extracted-to-console.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ <h2>Code for this demo:</h2>
3232
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
3333
token: token,
3434
objectMode: true, // send objects instead of text
35-
extractResults: true // convert {results: [{alternatives:[...]}], result_index: 0} to {alternatives: [...], index: 0}
35+
extractResults: true, // convert {results: [{alternatives:[...]}], result_index: 0} to {alternatives: [...], index: 0}
36+
format: false // optional - performs basic formatting on the results such as capitals an periods
3637
});
3738

3839
stream.on('data', function(data) {

examples/static/microphone-streaming-object-to-console.html

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ <h2>Code for this demo:</h2>
3131

3232
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
3333
token: token,
34-
objectMode: true // send objects instead of text
34+
objectMode: true, // send objects instead of text
35+
format: false // optional - performs basic formatting on the results such as capitals an periods
36+
3537
});
3638

3739
stream.on('data', function(data) {
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Watson Speech to Text client example</title>
6+
</head>
7+
<body>
8+
9+
<section>
10+
<h2>Transcribe from Microphone</h2>
11+
<button id="button">Transcribe File</button>
12+
<button id="stop">Stop</button>
13+
14+
<h2>Output:</h2>
15+
<div id="output">Open your browser's console to view the output.</div>
16+
</section>
17+
18+
<script src="watson-speech.js"></script>
19+
<!-- window.fetch pollyfill for IE/Edge & Older Chrome/FireFox -->
20+
<script src="bower_components/fetch/fetch.js"></script>
21+
22+
<h2>Code for this demo:</h2>
23+
24+
<pre><code><script style="display: block;">
25+
26+
// preloading the data for a smoother experience
27+
var preloadTokenAndAudio = Promise.all([
28+
fetch('/api/speech-to-text/token').then(function(response) {
29+
return response.text();
30+
}),
31+
fetch('/en-us-multi-speaker-narrowband.wav').then(function(response) {
32+
return response.blob();
33+
})
34+
]);
35+
36+
document.querySelector('#button').onclick = function () {
37+
preloadTokenAndAudio.then(function (values) {
38+
var token = values[0];
39+
var file = values[1];
40+
41+
var stream = WatsonSpeech.SpeechToText.recognizeFile({
42+
token: token,
43+
data: file,
44+
speaker_labels: true,
45+
// only certain models support speaker labels currently,
46+
// see http://www.ibm.com/watson/developercloud/doc/speech-to-text/output.shtml#speaker_labels
47+
model: 'en-US_NarrowbandModel',
48+
objectMode: true, // send objects instead of text
49+
realtime: true, // don't slow down the results if transcription occurs faster than playback
50+
format: false,
51+
play: true
52+
});
53+
54+
window.allResults = [];
55+
56+
stream.on('data', function(data) {
57+
console.log(data);
58+
allResults.push(data);
59+
});
60+
61+
stream.on('error', function(err) {
62+
console.log(err);
63+
});
64+
65+
document.querySelector('#stop').onclick = stream.stop.bind(stream);
66+
67+
}).catch(function(error) {
68+
console.log(error);
69+
});
70+
};
71+
72+
</script></code></pre>
73+
74+
</body>
75+
</html>

speech-to-text/recognize-file.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ module.exports = function recognizeFile(options) { // eslint-disable-line comple
103103
stream.pipe(new WritableElementStream(options));
104104
}
105105

106-
if(options.extractResults) {
106+
if (options.extractResults) {
107107
var stop = stream.stop.bind(stream);
108108
stream = stream.pipe(new ResultExtractor());
109109
stream.stop = stop;

speech-to-text/recognize-microphone.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ module.exports = function recognizeMicrophone(options) {
109109
stream.pipe(new WritableElementStream(options));
110110
}
111111

112-
if(options.extractResults) {
112+
if (options.extractResults) {
113113
stream = stream.pipe(new ResultExtractor());
114114
}
115115

speech-to-text/recognize-stream.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ var OPENING_MESSAGE_PARAMS_ALLOWED = [
3737
'max_alternatives',
3838
'word_alternatives_threshold',
3939
'profanity_filter',
40-
'smart_formatting'
40+
'smart_formatting',
41+
'speaker_labels'
4142
];
4243

4344
var QUERY_PARAMS_ALLOWED = [

0 commit comments

Comments
 (0)