Skip to content

Commit 7a31b82

Browse files
committed
renaming playFile to file for recognizeBlob and correcting documented default
also setting timing strea and format stream as defaults when appropriate
1 parent 3e44624 commit 7a31b82

12 files changed

+167
-62
lines changed

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
IBM Watson Speech To Text Browser Client Library
22
================================================
33

4+
[![Build Status](https://travis-ci.org/watson-developer-cloud/speech-javascript-sdk.svg?branch=master)](https://travis-ci.org/watson-developer-cloud/speech-javascript-sdk)
5+
46
Allows you to easily add voice recognition to any web app with minimal code.
57

68
**Warning** This library is still early-stage and may see significant breaking changes.
@@ -102,6 +104,14 @@ For use with `.recognizeBlob({playFile: true})` - slows the results down to matc
102104
Inherits `.stop()` method and `result` event from the `RecognizeStream`.
103105

104106

107+
## Changelog
108+
109+
### v0.7
110+
* Changed playFile option of recognizeBlob to play to match docs
111+
* Added options.format to recognize* to pipe text through a FormatStream (default: true)
112+
* Added close and end events to TimingStream
113+
114+
105115
## todo
106116

107117
* Fix bugs around `.stop()
@@ -116,5 +126,4 @@ Inherits `.stop()` method and `result` event from the `RecognizeStream`.
116126
* more tests in general
117127
* update node-sdk to use current version of this lib's RecognizeStream (and also provide the FormatStream + anything else that might be handy)
118128
* improve docs
119-
* check for a bug with the timing stream cutting off early
120-
129+
* automatically npm publish on passing tagged build

examples/public/audio-element-programmatic.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ <h2>Code for this demo:</h2>
3535
token: token,
3636
element: audioElement
3737
// muteSource: true // prevents sound from also playing locally
38-
})
39-
.pipe(new WatsonSpeech.SpeechToText.FormatStream()); // optional
38+
});
4039

4140
// each result gets it's own <span> because watson will sometimes go back and change a word as it hears more context
4241
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);

examples/public/audio-element.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ <h2>Code for this demo:</h2>
3434
token: token,
3535
element: $('#audio-element')[0]
3636
// muteSource: true // prevents sound from also playing locally
37-
})
38-
.pipe(new WatsonSpeech.SpeechToText.FormatStream()); // optional
37+
});
3938

4039
// each result gets it's own <span> because watson will sometimes go back and change a word as it hears more context
4140
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>File Promise Example - Watson Speech to Text</title>
6+
</head>
7+
<body>
8+
9+
<section>
10+
<h2>Transcribe File, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></h2>
11+
<input type="file" id="audiofile"> <button id="button">Transcribe and Play</button> <button id="stop">Stop</button>
12+
<p><small><i>Supported types are wav, ogg/opus (not ogg/vorbis), and flac. However, most browsers do not support flac.</i></small></p>
13+
14+
<h2><code>realtime: false</code> Output:</h2>
15+
<div id="output">--</div>
16+
17+
<h2><code>realtime: true</code> Output:</h2>
18+
<p><small><i>When transcription results are available faster than real-time (such as when simultaneously uploading and playing a file on a high-bandwidth connection), the TimingStream slows them down results to real-time.</i></small></p>
19+
<div id="realtime-output">--</div>
20+
</section>
21+
22+
<script src="watson-speech.js"></script>
23+
<script src="http://code.jquery.com/jquery-2.2.0.min.js"></script>
24+
25+
<h2>Code for this demo:</h2>
26+
<pre><code><script style="display: block;">
27+
function renderStream(stream, $output) {
28+
$output.html('');
29+
30+
// each result (sentence) gets it's own <span> because watson will sometimes go back and change a word as it hears more context
31+
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);
32+
33+
// a result is approximately equivalent to a sentence
34+
stream.on('result', function(result) {
35+
// update the text for the current sentence with the default alternative.
36+
// there may be multiple alternatives but this example app ignores all but the first.
37+
$curSentence.html(result.alternatives[0].transcript);
38+
if (result.final) {
39+
// if we have the final text for that sentence, start a new one
40+
$curSentence = $('<span/>').appendTo($output);
41+
}
42+
});
43+
44+
stream.on('error', function(err) {
45+
console.log(err);
46+
});
47+
48+
stream.on('playback-error', function(err) {
49+
console.log(err);
50+
});
51+
}
52+
53+
54+
$(function() {
55+
56+
var stream;
57+
58+
$('#button').click(function () {
59+
$.get('/token').then(function (token) {
60+
61+
stream = WatsonSpeech.SpeechToText.recognizeBlob({
62+
token: token,
63+
data: $('#audiofile')[0].files[0],
64+
play: true,
65+
max_alternatives: 1, // default is 3, but only the first one includes word timing data
66+
realtime: false // defaults to true, but we're going to turn it off and then manually do it in a moment to show the difference
67+
});
68+
69+
renderStream(stream, $('#output'));
70+
71+
// now do what the realtime option would have done: pipe through a TimingStream
72+
var realtimeStream = stream.pipe(new WatsonSpeech.SpeechToText.TimingStream());
73+
74+
renderStream(realtimeStream, $('#realtime-output'));
75+
76+
});
77+
});
78+
79+
$('#stop').click(function() {
80+
if (stream) {
81+
stream.stop();
82+
}
83+
});
84+
});
85+
</script></code></pre>
86+
</body>
87+
</html>

examples/public/file-promise.html

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ <h2>Code for this demo:</h2>
3636
timestamps: false,
3737
max_alternatives: 1
3838
})
39-
.pipe(new WatsonSpeech.SpeechToText.FormatStream()) // optional
4039
.promise()
4140
.then(function(text) {
4241
$output.text(text);

examples/public/file-streaming.html

Lines changed: 30 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -13,65 +13,50 @@ <h2>Transcribe File, Streaming</h2>
1313

1414
<h2>Output:</h2>
1515
<div id="output">--</div>
16-
17-
<h2>TimingStream Output:</h2>
18-
<p><small><i>When transcription results are available faster than real-time (such as when simultaneously uploading and playing a file on a high-bandwidth connection), the TimingStream slows them down results to real-time.</i></small></p>
19-
<div id="realtime-output">--</div>
2016
</section>
2117

2218
<script src="watson-speech.js"></script>
2319
<script src="http://code.jquery.com/jquery-2.2.0.min.js"></script>
2420

2521
<h2>Code for this demo:</h2>
2622
<pre><code><script style="display: block;">
27-
function renderStream(stream, $output) {
28-
$output.html('');
29-
30-
// each result (sentence) gets it's own <span> because watson will sometimes go back and change a word as it hears more context
31-
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);
32-
33-
// a result is approximately equivalent to a sentence
34-
stream.on('result', function(result) {
35-
// update the text for the current sentence with the default alternative.
36-
// there may be multiple alternatives but this example app ignores all but the first.
37-
$curSentence.html(result.alternatives[0].transcript);
38-
if (result.final) {
39-
// if we have the final text for that sentence, start a new one
40-
$curSentence = $('<span/>').appendTo($output);
41-
}
42-
});
43-
44-
stream.on('error', function(err) {
45-
console.log(err);
46-
});
47-
48-
stream.on('playback-error', function(err) {
49-
console.log(err);
50-
});
51-
}
52-
53-
5423
$(function() {
5524

56-
var stream;
25+
var stream, $output = $('#output');
5726

5827
$('#button').click(function () {
28+
$output.html('');
29+
5930
$.get('/token').then(function (token) {
6031

6132
stream = WatsonSpeech.SpeechToText.recognizeBlob({
6233
token: token,
6334
data: $('#audiofile')[0].files[0],
64-
playFile: true,
65-
max_alternatives: 1 // default is 3, but only the first one includes word timing data
66-
})
67-
.pipe(new WatsonSpeech.SpeechToText.FormatStream());// optional
68-
69-
renderStream(stream, $('#output'));
70-
71-
// optional, slows down results to real-time (only useful for recognizeBlob() with playFile: true)
72-
var realtimeStream = stream.pipe(new WatsonSpeech.SpeechToText.TimingStream());
73-
74-
renderStream(realtimeStream, $('#realtime-output'));
35+
play: true // play the audio out loud
36+
});
37+
38+
// each result (sentence) gets it's own <span> because watson will sometimes go back and change a word as it hears more context
39+
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);
40+
41+
// a result is approximately equivalent to a sentence
42+
stream.on('result', function(result) {
43+
// update the text for the current sentence with the default alternative.
44+
// there may be multiple alternatives but this example app ignores all but the first.
45+
$curSentence.html(result.alternatives[0].transcript);
46+
if (result.final) {
47+
// if we have the final text for that sentence, start a new one
48+
$curSentence = $('<span/>').appendTo($output);
49+
}
50+
});
51+
52+
stream.on('error', function(err) {
53+
console.log(err);
54+
});
55+
56+
// handle file playback errors
57+
stream.on('playback-error', function(err) {
58+
console.log(err);
59+
});
7560

7661
});
7762
});
@@ -85,3 +70,5 @@ <h2>Code for this demo:</h2>
8570
</script></code></pre>
8671
</body>
8772
</html>
73+
74+

examples/public/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
<li><a href="microphone-streaming.html">Transcribe from Microphone, Streaming</a></li>
1010
<li><a href="microphone-streaming-auto-stop.html">Transcribe from Microphone, Streaming, automatically stop at first pause</a></li>
1111
<li><a href="file-streaming.html">Transcribe from file, Streaming</a></li>
12+
<li><a href="blob-realtime-vs-no-realtime.html">Transcribe from file, Comparing <code>{realtime: true}</code> to <code>{realtime: false}</code></a></li>
1213
<li><a href="file-promise.html">Transcribe from file, Promise</a></li>
1314
<li><a href="audio-element.html">Transcribe from HTML5 &lt;audio&gt; element, Streaming</a></li>
1415
<li><a href="audio-element-programmatic.html">Transcribe from <code>new Audio()</code>, Streaming</a></li>

examples/public/microphone-streaming-auto-stop.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ <h2>Code for this demo:</h2>
3030
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
3131
token: token,
3232
continuous: false // false = automatically stop transcription the first time a pause is detected
33-
})
34-
.pipe(new WatsonSpeech.SpeechToText.FormatStream()); // optional
33+
});
3534

3635
// each result (sentence) gets it's own <span> because Watson will sometimes go back and change a word as it hears more context
3736
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);

examples/public/microphone-streaming.html

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,7 @@ <h2>Code for this demo:</h2>
3030
$.get('/token').then(function (token) {
3131
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
3232
token: token
33-
})
34-
.pipe(new WatsonSpeech.SpeechToText.FormatStream()); // optional
33+
});
3534

3635
// each result (sentence) gets it's own <span> because Watson will sometimes go back and change a word as it hears more context
3736
var $curSentence = $('<span>&nbsp;</span>').appendTo($output);

speech-to-text/recognize-blob.js

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
var BlobStream = require('readable-blob-stream');
1919
var RecognizeStream = require('./recognize-stream.js');
2020
var FilePlayer = require('./file-player.js');
21-
21+
var FormatStream = require('./format-stream.js');
22+
var TimingStream = require('./timing-stream.js');
2223

2324
/**
2425
* Create and return a RecognizeStream from a File or Blob
@@ -27,7 +28,9 @@ var FilePlayer = require('./file-player.js');
2728
* @param {Object} options - Also passed to {MediaElementAudioStream} and to {RecognizeStream}
2829
* @param {String} options.token - Auth Token - see https://github.com/watson-developer-cloud/node-sdk#authorization
2930
* @param {Blob|File} options.data - the raw audio data as a Blob or File instance
30-
* @param {Boolean} [options.play=true] - If a file is set, play it locally as it's being uploaded
31+
* @param {Boolean} [options.play=false] - If a file is set, play it locally as it's being uploaded
32+
* @param {Boolena} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting
33+
* @param {Boolena} [options.realtime=options.play] - pipe the text through a {TimingStream} which slows the output down to real-time to match the audio playback.
3134
*
3235
* @returns {RecognizeStream}
3336
*/
@@ -36,17 +39,26 @@ module.exports = function recognizeBlob(options) {
3639
throw new Error("WatsonSpeechToText: missing required parameter: opts.token");
3740
}
3841

39-
var recognizeStream = new RecognizeStream(options);
42+
var recognizeStream = new RecognizeStream(options);
43+
var stream = new BlobStream(options.data).pipe(recognizeStream);
44+
45+
if (options.format !== false) {
46+
stream = stream.pipe(new FormatStream(options));
47+
}
48+
if (options.realtime || typeof options.realtime === 'undefined' && options.play) {
49+
stream = stream.pipe(new TimingStream(options));
50+
start = Date.now();
51+
}
4052

41-
if (options.playFile) {
53+
if (options.play) {
4254
FilePlayer.playFile(options.data).then(function (player) {
4355
recognizeStream.on('stop', player.stop.bind(player));
4456
}).catch(function (err) {
4557
recognizeStream.emit('playback-error', err);
4658
});
4759
}
4860

49-
return new BlobStream(options.data).pipe(recognizeStream);
61+
return stream;
5062
};
5163

5264

0 commit comments

Comments
 (0)