|
| 1 | +'use strict'; |
| 2 | +var Readable = require('stream').Readable; |
| 3 | +var util = require('util'); |
| 4 | +var defaults = require('defaults'); |
| 5 | + |
| 6 | +/** |
| 7 | + * Extracts audio from an `<audio>` or `<video>` element and provides it as a Node.js Readable stream |
| 8 | + * |
| 9 | + * @deprecated - the SDK no longer supports transcription from audio/video elements |
| 10 | + * |
| 11 | + * @param {HTMLMediaElement|string} element `<audio>` or `<video>` element or CSS selector |
| 12 | + * @param {Object} [options] options |
| 13 | + * @param {Number|null} [options.bufferSize=null] buffer size - Mozilla docs recommend leaving this unset for optimal performance |
| 14 | + * @param {Boolean} [options.muteSource=false] - If true, the audio will not be sent back to the source |
| 15 | + * @param {Boolean} [options.objectMode=true] - emit AudioBuffers w/ the audio + a bit of metadata instead of Node.js Buffers with audio only |
| 16 | + * |
| 17 | + * @see https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement |
| 18 | + * @see https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor |
| 19 | + * |
| 20 | + * @todo: add option for whether to keep or destroy the context |
| 21 | + * @todo: test what happens if source has multiple channels |
| 22 | + * |
| 23 | + * @constructor |
| 24 | + */ |
| 25 | +function MediaElementAudioStream(element, options) { |
| 26 | + |
| 27 | + options = defaults(options, { |
| 28 | + // "It is recommended for authors to not specify this buffer size and allow the implementation to pick a good |
| 29 | + // buffer size to balance between latency and audio quality." |
| 30 | + // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor |
| 31 | + // Possible values: null, 256, 512, 1024, 2048, 4096, 8192, 16384 |
| 32 | + // however, webkitAudioContext (safari) requires it to be set |
| 33 | + bufferSize: (typeof AudioContext != "undefined" ? null : 4096), |
| 34 | + muteSource: false, |
| 35 | + autoPlay: true, |
| 36 | + crossOrigin: "anonymous", // required for cross-domain audio playback |
| 37 | + objectMode: true // true = emit AudioBuffers w/ audio + some metadata, false = emite node.js Buffers (with binary data only |
| 38 | + }); |
| 39 | + |
| 40 | + // We can only emit one channel's worth of audio, so only one input. (Who has multiple microphones anyways?) |
| 41 | + var inputChannels = 1; |
| 42 | + |
| 43 | + // we shouldn't need any output channels (going back to the browser - that's what the gain node is for), but chrome is buggy and won't give us any audio without one |
| 44 | + var outputChannels = 1; |
| 45 | + |
| 46 | + if (typeof element == 'string') { |
| 47 | + element = document.querySelector(element); |
| 48 | + } |
| 49 | + |
| 50 | + if (!element) { |
| 51 | + throw new Error('Watson Speech to Text MediaElementAudioStream: missing element'); |
| 52 | + } |
| 53 | + |
| 54 | + Readable.call(this, options); |
| 55 | + |
| 56 | + var self = this; |
| 57 | + var recording = true; |
| 58 | + |
| 59 | + // I can't find much documentation for this for <audio> elements, but it seems to be required for cross-domain usage (in addition to CORS headers) |
| 60 | + element.crossOrigin = options.crossOrigin; |
| 61 | + |
| 62 | + /** |
| 63 | + * Convert and emit the raw audio data |
| 64 | + * @see https://developer.mozilla.org/en-US/docs/Web/API/ScriptProcessorNode/onaudioprocess |
| 65 | + * @param {AudioProcessingEvent} e https://developer.mozilla.org/en-US/docs/Web/API/AudioProcessingEvent |
| 66 | + */ |
| 67 | + function processAudio(e) { |
| 68 | + // onaudioprocess can be called at least once after we've stopped |
| 69 | + if (recording) { |
| 70 | + // todo: interleave channels in binary mode |
| 71 | + self.push( options.objectMode ? e.inputBuffer : new Buffer(e.inputBuffer.getChannelData(0)) ); |
| 72 | + } |
| 73 | + } |
| 74 | + |
| 75 | + var AudioContext = window.AudioContext || window.webkitAudioContext; |
| 76 | + // cache the source node & context since it's not possible to recreate it later |
| 77 | + var context = element.context = element.context || new AudioContext(); |
| 78 | + var audioInput = element.node = element.node || context.createMediaElementSource(element); |
| 79 | + var scriptProcessor = context.createScriptProcessor(options.bufferSize, inputChannels, outputChannels); |
| 80 | + |
| 81 | + scriptProcessor.onaudioprocess = processAudio; |
| 82 | + |
| 83 | + if (!options.muteSource) { |
| 84 | + var gain = context.createGain(); |
| 85 | + audioInput.connect(gain); |
| 86 | + gain.connect(context.destination); |
| 87 | + } |
| 88 | + |
| 89 | + /** |
| 90 | + * Setup script processor to extract audio and also re-connect it via a no-op gain node if desired |
| 91 | + * |
| 92 | + * Delayed to avoid processing the stream of silence received before the file begins playing |
| 93 | + * |
| 94 | + */ |
| 95 | + function connect() { |
| 96 | + audioInput.connect(scriptProcessor); |
| 97 | + // other half of workaround for chrome bugs |
| 98 | + scriptProcessor.connect(context.destination); |
| 99 | + element.removeEventListener("playing", connect); |
| 100 | + } |
| 101 | + element.addEventListener("playing", connect); |
| 102 | + |
| 103 | + // https://developer.mozilla.org/en-US/docs/Web/Guide/Events/Media_events |
| 104 | + // https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/readyState |
| 105 | + function start() { |
| 106 | + element.play(); |
| 107 | + element.removeEventListener("canplaythrough", start); |
| 108 | + } |
| 109 | + if (options.autoPlay) { |
| 110 | + // play immediately if we have enough data, otherwise wait for the canplaythrough event |
| 111 | + if(element.readyState === element.HAVE_ENOUGH_DATA) { |
| 112 | + element.play(); |
| 113 | + } else { |
| 114 | + element.addEventListener("canplaythrough", start); |
| 115 | + } |
| 116 | + } |
| 117 | + |
| 118 | + function end() { |
| 119 | + recording = false; |
| 120 | + scriptProcessor.disconnect(); |
| 121 | + audioInput.disconnect(); |
| 122 | + //context.close(); // this prevents us from re-using the same audio element until the page is refreshed |
| 123 | + self.push(null); |
| 124 | + self.emit('close'); |
| 125 | + } |
| 126 | + element.addEventListener("ended", end); |
| 127 | + |
| 128 | + this.stop = function() { |
| 129 | + element.pause(); |
| 130 | + end(); |
| 131 | + }; |
| 132 | + |
| 133 | + element.addEventListener("error", this.emit.bind(this, 'error')); |
| 134 | + |
| 135 | + process.nextTick(function() { |
| 136 | + // this is more useful for binary mode than object mode, but it won't hurt either way |
| 137 | + self.emit('format', { |
| 138 | + channels: 1, |
| 139 | + bitDepth: 32, |
| 140 | + sampleRate: context.sampleRate, |
| 141 | + signed: true, |
| 142 | + float: true |
| 143 | + }); |
| 144 | + }); |
| 145 | + |
| 146 | +} |
| 147 | +util.inherits(MediaElementAudioStream, Readable); |
| 148 | + |
| 149 | +MediaElementAudioStream.prototype._read = function(/* bytes */) { |
| 150 | + // no-op, (back-pressure flow-control doesn't really work on sound) |
| 151 | +}; |
| 152 | + |
| 153 | +/** |
| 154 | + * Converts a Buffer back into the raw Float32Array format that browsers use. |
| 155 | + * Note: this is just a new DataView for the same underlying buffer - |
| 156 | + * the actual audio data is not copied or changed here. |
| 157 | + * |
| 158 | + * @param {Buffer} chunk node-style buffer of audio data from a 'data' event or read() call |
| 159 | + * @return {Float32Array} raw 32-bit float data view of audio data |
| 160 | + */ |
| 161 | +MediaElementAudioStream.toRaw = function toFloat32(chunk) { |
| 162 | + return new Float32Array(chunk.buffer); |
| 163 | +}; |
| 164 | + |
| 165 | +module.exports = MediaElementAudioStream; |
0 commit comments