Skip to content

Commit d93a8e2

Browse files
committed
new examples: browserify, audio/video element
audio/video element includes deprecated code from v0.14 with a clear notice that it is not supported
1 parent ea9b5f6 commit d93a8e2

File tree

9 files changed

+391
-4
lines changed

9 files changed

+391
-4
lines changed

examples/package.json

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,15 @@
66
"start": "node server.js"
77
},
88
"dependencies": {
9+
"defaults": "^1.0.3",
910
"dotenv": "^2.0.0",
1011
"express": "^4.13.3",
12+
"express-browserify": "^1.0.2",
13+
"object.assign": "^4.0.4",
1114
"vcap_services": "^0.1.7",
1215
"watson-developer-cloud": "^1.2.0",
13-
"watson-speech": "*"
14-
}
16+
"watson-speech": "*",
17+
"whatwg-fetch": "^1.0.0"
18+
},
19+
"license": "Apache-2.0"
1520
}

examples/server.js

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,27 @@
1616

1717
'use strict';
1818

19-
var express = require('express'),
20-
app = express();
19+
var express = require('express');
20+
var app = express();
21+
var expressBrowserify = require('express-browserify');
2122

2223
// allows environment properties to be set in a file named .env
2324
require('dotenv').load({silent: true});
2425

2526
app.use(express.static(__dirname + '/static'));
2627

28+
// set up express-browserify to serve bundles for examples
29+
var isDev = app.get('env') === 'development';
30+
app.get('/bundle.js', expressBrowserify('static/browserify-app.js', {
31+
watch: isDev,
32+
debug: isDev
33+
}));
34+
app.get('/audio-video-deprecated/bundle.js', expressBrowserify('static/audio-video-deprecated/audio-video-app.js', {
35+
watch: isDev,
36+
debug: isDev
37+
}));
38+
39+
2740
// token endpoints
2841
// **Warning**: these endpoints should be guarded with additional authentication & authorization for production use
2942
app.use('/api/speech-to-text/', require('./stt-token.js'));
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// recognizeElement example, now deprecated
2+
// requires browserify
3+
4+
// global window.fetch pollyfill for IE/Edge & Older Chrome/FireFox
5+
require('whatwg-fetch');
6+
7+
// keep the bundle slim by only requiring the necessary modules
8+
var recognizeElement = require('./recognize-element');
9+
10+
document.querySelector('#button').onclick = function () {
11+
fetch('/api/speech-to-text/token').then(function(response) {
12+
return response.text();
13+
}).then(function (token) {
14+
var stream = recognizeElement({
15+
// muteSource: true, // prevents sound from also playing locally
16+
token: token,
17+
element: '#audio-element', // may be a CSS selector or a DOM Element
18+
outputElement: '#output' // ditto
19+
});
20+
stream.on('error', function(err) {
21+
console.log(err);
22+
});
23+
}).catch(function(error) {
24+
console.log(error);
25+
});
26+
};
27+
28+
// note: you may also create audio/video elements pragmatically via new Audio() or
29+
// document.createElement('video'); and then set the .src
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Deprecated: Audio Element | IBM Watson Speech JavaScript SDK Example</title>
6+
</head>
7+
<body>
8+
9+
<section>
10+
<h2>Deprecated: Transcribe &lt;audio&gt; Element, Streaming</h2>
11+
<p>Note: this was a feature of the library, but was removed due to quality issues.
12+
The code is preserved in this example but is not supported.</p>
13+
14+
<audio id="audio-element" src="../audio.wav" controls>
15+
Your browser does not support the <code>audio</code> element.
16+
</audio>
17+
18+
<button id="button">Play and Transcribe</button>
19+
20+
<p><small><i>&lt;video&gt; elements should behave the same as &lt;audio&gt; elements. The browser must be able to decode and play the file in either case.</i></small></p>
21+
22+
<h2>Output:</h2>
23+
<div id="output">--</div>
24+
</section>
25+
26+
<script src="bundle.js"></script>
27+
28+
<h2>Code for this demo:</h2>
29+
30+
31+
<p>Note: this code is compiled into <a href="bundle.js"><code>bundle.js</code></a> by <code>express-browserify</code>.
32+
This requires Node.js.</p>
33+
34+
<pre><code><embed type="text/plain" src="audio-video-app.js" width="100%" height="600"></embed></code></pre>
35+
36+
</body>
37+
</html>
38+
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
'use strict';
2+
var Readable = require('stream').Readable;
3+
var util = require('util');
4+
var defaults = require('defaults');
5+
6+
/**
7+
* Extracts audio from an `<audio>` or `<video>` element and provides it as a Node.js Readable stream
8+
*
9+
* @deprecated - the SDK no longer supports transcription from audio/video elements
10+
*
11+
* @param {HTMLMediaElement|string} element `<audio>` or `<video>` element or CSS selector
12+
* @param {Object} [options] options
13+
* @param {Number|null} [options.bufferSize=null] buffer size - Mozilla docs recommend leaving this unset for optimal performance
14+
* @param {Boolean} [options.muteSource=false] - If true, the audio will not be sent back to the source
15+
* @param {Boolean} [options.objectMode=true] - emit AudioBuffers w/ the audio + a bit of metadata instead of Node.js Buffers with audio only
16+
*
17+
* @see https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement
18+
* @see https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
19+
*
20+
* @todo: add option for whether to keep or destroy the context
21+
* @todo: test what happens if source has multiple channels
22+
*
23+
* @constructor
24+
*/
25+
function MediaElementAudioStream(element, options) {
26+
27+
options = defaults(options, {
28+
// "It is recommended for authors to not specify this buffer size and allow the implementation to pick a good
29+
// buffer size to balance between latency and audio quality."
30+
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
31+
// Possible values: null, 256, 512, 1024, 2048, 4096, 8192, 16384
32+
// however, webkitAudioContext (safari) requires it to be set
33+
bufferSize: (typeof AudioContext != "undefined" ? null : 4096),
34+
muteSource: false,
35+
autoPlay: true,
36+
crossOrigin: "anonymous", // required for cross-domain audio playback
37+
objectMode: true // true = emit AudioBuffers w/ audio + some metadata, false = emite node.js Buffers (with binary data only
38+
});
39+
40+
// We can only emit one channel's worth of audio, so only one input. (Who has multiple microphones anyways?)
41+
var inputChannels = 1;
42+
43+
// we shouldn't need any output channels (going back to the browser - that's what the gain node is for), but chrome is buggy and won't give us any audio without one
44+
var outputChannels = 1;
45+
46+
if (typeof element == 'string') {
47+
element = document.querySelector(element);
48+
}
49+
50+
if (!element) {
51+
throw new Error('Watson Speech to Text MediaElementAudioStream: missing element');
52+
}
53+
54+
Readable.call(this, options);
55+
56+
var self = this;
57+
var recording = true;
58+
59+
// I can't find much documentation for this for <audio> elements, but it seems to be required for cross-domain usage (in addition to CORS headers)
60+
element.crossOrigin = options.crossOrigin;
61+
62+
/**
63+
* Convert and emit the raw audio data
64+
* @see https://developer.mozilla.org/en-US/docs/Web/API/ScriptProcessorNode/onaudioprocess
65+
* @param {AudioProcessingEvent} e https://developer.mozilla.org/en-US/docs/Web/API/AudioProcessingEvent
66+
*/
67+
function processAudio(e) {
68+
// onaudioprocess can be called at least once after we've stopped
69+
if (recording) {
70+
// todo: interleave channels in binary mode
71+
self.push( options.objectMode ? e.inputBuffer : new Buffer(e.inputBuffer.getChannelData(0)) );
72+
}
73+
}
74+
75+
var AudioContext = window.AudioContext || window.webkitAudioContext;
76+
// cache the source node & context since it's not possible to recreate it later
77+
var context = element.context = element.context || new AudioContext();
78+
var audioInput = element.node = element.node || context.createMediaElementSource(element);
79+
var scriptProcessor = context.createScriptProcessor(options.bufferSize, inputChannels, outputChannels);
80+
81+
scriptProcessor.onaudioprocess = processAudio;
82+
83+
if (!options.muteSource) {
84+
var gain = context.createGain();
85+
audioInput.connect(gain);
86+
gain.connect(context.destination);
87+
}
88+
89+
/**
90+
* Setup script processor to extract audio and also re-connect it via a no-op gain node if desired
91+
*
92+
* Delayed to avoid processing the stream of silence received before the file begins playing
93+
*
94+
*/
95+
function connect() {
96+
audioInput.connect(scriptProcessor);
97+
// other half of workaround for chrome bugs
98+
scriptProcessor.connect(context.destination);
99+
element.removeEventListener("playing", connect);
100+
}
101+
element.addEventListener("playing", connect);
102+
103+
// https://developer.mozilla.org/en-US/docs/Web/Guide/Events/Media_events
104+
// https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/readyState
105+
function start() {
106+
element.play();
107+
element.removeEventListener("canplaythrough", start);
108+
}
109+
if (options.autoPlay) {
110+
// play immediately if we have enough data, otherwise wait for the canplaythrough event
111+
if(element.readyState === element.HAVE_ENOUGH_DATA) {
112+
element.play();
113+
} else {
114+
element.addEventListener("canplaythrough", start);
115+
}
116+
}
117+
118+
function end() {
119+
recording = false;
120+
scriptProcessor.disconnect();
121+
audioInput.disconnect();
122+
//context.close(); // this prevents us from re-using the same audio element until the page is refreshed
123+
self.push(null);
124+
self.emit('close');
125+
}
126+
element.addEventListener("ended", end);
127+
128+
this.stop = function() {
129+
element.pause();
130+
end();
131+
};
132+
133+
element.addEventListener("error", this.emit.bind(this, 'error'));
134+
135+
process.nextTick(function() {
136+
// this is more useful for binary mode than object mode, but it won't hurt either way
137+
self.emit('format', {
138+
channels: 1,
139+
bitDepth: 32,
140+
sampleRate: context.sampleRate,
141+
signed: true,
142+
float: true
143+
});
144+
});
145+
146+
}
147+
util.inherits(MediaElementAudioStream, Readable);
148+
149+
MediaElementAudioStream.prototype._read = function(/* bytes */) {
150+
// no-op, (back-pressure flow-control doesn't really work on sound)
151+
};
152+
153+
/**
154+
* Converts a Buffer back into the raw Float32Array format that browsers use.
155+
* Note: this is just a new DataView for the same underlying buffer -
156+
* the actual audio data is not copied or changed here.
157+
*
158+
* @param {Buffer} chunk node-style buffer of audio data from a 'data' event or read() call
159+
* @return {Float32Array} raw 32-bit float data view of audio data
160+
*/
161+
MediaElementAudioStream.toRaw = function toFloat32(chunk) {
162+
return new Float32Array(chunk.buffer);
163+
};
164+
165+
module.exports = MediaElementAudioStream;
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/**
2+
* Copyright 2015 IBM Corp. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
var MediaElementAudioStream = require('./media-element-audio-stream');
19+
var L16 = require('watson-speech/speech-to-text/webaudio-l16-stream');
20+
var RecognizeStream = require('watson-speech/speech-to-text/recognize-stream.js');
21+
var FormatStream = require('watson-speech/speech-to-text/format-stream.js');
22+
var assign = require('object.assign/polyfill')();
23+
var WritableElementStream = require('watson-speech/speech-to-text/writable-element-stream');
24+
25+
/**
26+
* Recognize audio from a <audio> or <video> element
27+
*
28+
* @deprecated - This method has several quality issues, and so is no longer supported.
29+
*
30+
* @param {Object} options - Also passed to {MediaElementAudioStream} and to {RecognizeStream}
31+
* @param {String} options.token - Auth Token - see https://github.com/watson-developer-cloud/node-sdk#authorization
32+
* @param {MediaElement} options.element - the <video> or <audio> element to play
33+
* @param {Boolena} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting
34+
* @param {String|DOMElement} [options.outputElement] pipe the text to a WriteableElementStream targeting the specified element. Also defaults objectMode to true to enable interim results.
35+
*
36+
* @returns {RecognizeStream|FormatStream}
37+
*/
38+
module.exports = function recognizeElement(options) {
39+
if (!options || !options.token) {
40+
throw new Error("WatsonSpeechToText: missing required parameter: opts.token");
41+
}
42+
43+
// the WritableElementStream works best in objectMode
44+
if (options.outputElement && options.objectMode !== false) {
45+
options.objectMode = true;
46+
}
47+
48+
49+
// we don't want the readable stream to have objectMode on the input even if we're setting it for the output
50+
var rsOpts = assign({}, options);
51+
rsOpts.readableObjectMode = options.objectMode;
52+
rsOpts['content-type'] = 'audio/l16;rate=16000';
53+
delete rsOpts.objectMode;
54+
55+
var recognizeStream = new RecognizeStream(rsOpts);
56+
57+
var sourceStream = new MediaElementAudioStream(options.element , {
58+
objectMode: true,
59+
bufferSize: options.bufferSize,
60+
muteSource: options.muteSource,
61+
autoPlay: options.autoPlay !== false // default to true if it's undefined
62+
});
63+
64+
var stream = sourceStream
65+
.pipe(new L16({writableObjectMode: true}))
66+
.pipe(recognizeStream);
67+
68+
if (options.format !== false) {
69+
stream = stream.pipe(new FormatStream(options));
70+
stream.stop = recognizeStream.stop.bind(recognizeStream);
71+
}
72+
73+
recognizeStream.on('stop', sourceStream.stop.bind(sourceStream));
74+
75+
if (options.outputElement) {
76+
stream.pipe(new WritableElementStream(options))
77+
}
78+
79+
return stream;
80+
};

examples/static/browserify-app.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Browserify bundling example
2+
3+
// global window.fetch pollyfill for IE/Edge & Older Chrome/FireFox
4+
require('whatwg-fetch');
5+
6+
// keep the bundle slim by only requiring the necessary modules
7+
var recognizeMicrophone = require('watson-speech/speech-to-text/recognize-microphone');
8+
9+
document.querySelector('#button').onclick = function () {
10+
11+
fetch('/api/speech-to-text/token')
12+
.then(function(response) {
13+
return response.text();
14+
}).then(function (token) {
15+
16+
var stream = recognizeMicrophone({
17+
token: token,
18+
continuous: false, // false = automatically stop transcription the first time a pause is detected
19+
outputElement: '#output' // CSS selector or DOM Element
20+
});
21+
22+
stream.on('error', function(err) {
23+
console.log(err);
24+
});
25+
26+
}).catch(function(error) {
27+
console.log(error);
28+
});
29+
};

0 commit comments

Comments
 (0)