new examples: browserify, audio/video element

nfriedly · nfriedly · commit d93a8e2e57ca · 2016-08-17T17:09:45.000-05:00
audio/video element includes deprecated code from v0.14 with a clear notice that it is not supported
diff --git a/examples/package.json b/examples/package.json
@@ -6,10 +6,15 @@
     "start": "node server.js"
   },
   "dependencies": {
+    "defaults": "^1.0.3",
     "dotenv": "^2.0.0",
     "express": "^4.13.3",
+    "express-browserify": "^1.0.2",
+    "object.assign": "^4.0.4",
     "vcap_services": "^0.1.7",
     "watson-developer-cloud": "^1.2.0",
-    "watson-speech": "*"
-  }
+    "watson-speech": "*",
+    "whatwg-fetch": "^1.0.0"
+  },
+  "license": "Apache-2.0"
 }
diff --git a/examples/server.js b/examples/server.js
@@ -16,14 +16,27 @@
 
 'use strict';
 
-var express = require('express'),
-  app = express();
+var express = require('express');
+var app = express();
+var expressBrowserify = require('express-browserify');
 
 // allows environment properties to be set in a file named .env
 require('dotenv').load({silent: true});
 
 app.use(express.static(__dirname + '/static'));
 
+// set up express-browserify to serve bundles for examples
+var isDev = app.get('env') === 'development';
+app.get('/bundle.js', expressBrowserify('static/browserify-app.js', {
+  watch: isDev,
+  debug: isDev
+}));
+app.get('/audio-video-deprecated/bundle.js', expressBrowserify('static/audio-video-deprecated/audio-video-app.js', {
+  watch: isDev,
+  debug: isDev
+}));
+
+
 // token endpoints
 // **Warning**: these endpoints should be guarded with additional authentication & authorization for production use
 app.use('/api/speech-to-text/', require('./stt-token.js'));
diff --git a/examples/static/audio-video-deprecated/audio-video-app.js b/examples/static/audio-video-deprecated/audio-video-app.js
@@ -0,0 +1,29 @@
+// recognizeElement example, now deprecated
+// requires browserify
+
+// global window.fetch pollyfill for IE/Edge & Older Chrome/FireFox
+require('whatwg-fetch');
+
+// keep the bundle slim by only requiring the necessary modules
+var recognizeElement = require('./recognize-element');
+
+document.querySelector('#button').onclick = function () {
+  fetch('/api/speech-to-text/token').then(function(response) {
+    return response.text();
+  }).then(function (token) {
+    var stream = recognizeElement({
+      // muteSource: true, // prevents sound from also playing locally
+      token: token,
+      element: '#audio-element', // may be a CSS selector or a DOM Element
+      outputElement: '#output'   // ditto
+    });
+    stream.on('error', function(err) {
+      console.log(err);
+    });
+  }).catch(function(error) {
+    console.log(error);
+  });
+};
+
+// note: you may also create audio/video elements pragmatically via new Audio() or
+// document.createElement('video'); and then set the .src
diff --git a/examples/static/audio-video-deprecated/index.html b/examples/static/audio-video-deprecated/index.html
@@ -0,0 +1,38 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>Deprecated: Audio Element | IBM Watson Speech JavaScript SDK Example</title>
+</head>
+<body>
+
+<section>
+  <h2>Deprecated: Transcribe &lt;audio&gt; Element, Streaming</h2>
+  <p>Note: this was a feature of the library, but was removed due to quality issues.
+    The code is preserved in this example but is not supported.</p>
+
+  <audio id="audio-element" src="../audio.wav" controls>
+    Your browser does not support the <code>audio</code> element.
+  </audio>
+
+  <button id="button">Play and Transcribe</button>
+
+  <p><small><i>&lt;video&gt; elements should behave the same as &lt;audio&gt; elements. The browser must be able to decode and play the file in either case.</i></small></p>
+
+  <h2>Output:</h2>
+  <div id="output">--</div>
+</section>
+
+<script src="bundle.js"></script>
+
+<h2>Code for this demo:</h2>
+
+
+<p>Note: this code is compiled into <a href="bundle.js"><code>bundle.js</code></a> by <code>express-browserify</code>.
+  This requires Node.js.</p>
+
+<pre><code><embed type="text/plain" src="audio-video-app.js" width="100%" height="600"></embed></code></pre>
+
+</body>
+</html>
+
diff --git a/examples/static/audio-video-deprecated/media-element-audio-stream.js b/examples/static/audio-video-deprecated/media-element-audio-stream.js
@@ -0,0 +1,165 @@
+'use strict';
+var Readable = require('stream').Readable;
+var util = require('util');
+var defaults = require('defaults');
+
+/**
+ * Extracts audio from an `<audio>` or `<video>` element and provides it as a Node.js Readable stream
+ *
+ * @deprecated - the SDK no longer supports transcription from audio/video elements
+ *
+ * @param {HTMLMediaElement|string} element `<audio>` or `<video>` element or CSS selector
+ * @param {Object} [options] options
+ * @param {Number|null} [options.bufferSize=null] buffer size - Mozilla docs recommend leaving this unset for optimal performance
+ * @param {Boolean} [options.muteSource=false] - If true, the audio will not be sent back to the source
+ * @param {Boolean} [options.objectMode=true] - emit AudioBuffers w/ the audio + a bit of metadata instead of Node.js Buffers with audio only
+ *
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+ *
+ * @todo: add option for whether to keep or destroy the context
+ * @todo: test what happens if source has multiple channels
+ *
+ * @constructor
+ */
+function MediaElementAudioStream(element, options) {
+
+  options = defaults(options, {
+    // "It is recommended for authors to not specify this buffer size and allow the implementation to pick a good
+    // buffer size to balance between latency and audio quality."
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // Possible values: null, 256, 512, 1024, 2048, 4096, 8192, 16384
+    // however, webkitAudioContext (safari) requires it to be set
+    bufferSize: (typeof AudioContext != "undefined" ? null : 4096),
+    muteSource: false,
+    autoPlay: true,
+    crossOrigin: "anonymous", // required for cross-domain audio playback
+    objectMode: true // true = emit AudioBuffers w/ audio + some metadata, false = emite node.js Buffers (with binary data only
+  });
+
+  // We can only emit one channel's worth of audio, so only one input. (Who has multiple microphones anyways?)
+  var inputChannels = 1;
+
+  // we shouldn't need any output channels (going back to the browser - that's what the gain node is for), but chrome is buggy and won't give us any audio without one
+  var outputChannels = 1;
+
+  if (typeof element == 'string') {
+    element = document.querySelector(element);
+  }
+
+  if (!element) {
+    throw new Error('Watson Speech to Text MediaElementAudioStream: missing element');
+  }
+
+  Readable.call(this, options);
+
+  var self = this;
+  var recording = true;
+
+  // I can't find much documentation for this for <audio> elements, but it seems to be required for cross-domain usage (in addition to CORS headers)
+  element.crossOrigin = options.crossOrigin;
+
+  /**
+   * Convert and emit the raw audio data
+   * @see https://developer.mozilla.org/en-US/docs/Web/API/ScriptProcessorNode/onaudioprocess
+   * @param {AudioProcessingEvent} e https://developer.mozilla.org/en-US/docs/Web/API/AudioProcessingEvent
+   */
+  function processAudio(e) {
+    // onaudioprocess can be called at least once after we've stopped
+    if (recording) {
+      // todo: interleave channels in binary mode
+      self.push( options.objectMode ? e.inputBuffer : new Buffer(e.inputBuffer.getChannelData(0)) );
+    }
+  }
+
+  var AudioContext = window.AudioContext || window.webkitAudioContext;
+  // cache the source node & context since it's not possible to recreate it later
+  var context = element.context = element.context || new AudioContext();
+  var audioInput = element.node  = element.node || context.createMediaElementSource(element);
+  var scriptProcessor = context.createScriptProcessor(options.bufferSize, inputChannels, outputChannels);
+
+  scriptProcessor.onaudioprocess = processAudio;
+
+  if (!options.muteSource) {
+    var gain = context.createGain();
+    audioInput.connect(gain);
+    gain.connect(context.destination);
+  }
+
+  /**
+   * Setup script processor to extract audio and also re-connect it via a no-op gain node if desired
+   *
+   * Delayed to avoid processing the stream of silence received before the file begins playing
+   *
+   */
+  function connect() {
+    audioInput.connect(scriptProcessor);
+    // other half of workaround for chrome bugs
+    scriptProcessor.connect(context.destination);
+    element.removeEventListener("playing", connect);
+  }
+  element.addEventListener("playing", connect);
+
+  // https://developer.mozilla.org/en-US/docs/Web/Guide/Events/Media_events
+  // https://developer.mozilla.org/en-US/docs/Web/API/HTMLMediaElement/readyState
+  function start() {
+    element.play();
+    element.removeEventListener("canplaythrough", start);
+  }
+  if (options.autoPlay) {
+    // play immediately if we have enough data, otherwise wait for the canplaythrough event
+    if(element.readyState === element.HAVE_ENOUGH_DATA) {
+      element.play();
+    } else {
+      element.addEventListener("canplaythrough", start);
+    }
+  }
+
+  function end() {
+    recording = false;
+    scriptProcessor.disconnect();
+    audioInput.disconnect();
+    //context.close(); // this prevents us from re-using the same audio element until the page is refreshed
+    self.push(null);
+    self.emit('close');
+  }
+  element.addEventListener("ended", end);
+
+  this.stop = function() {
+    element.pause();
+    end();
+  };
+
+  element.addEventListener("error", this.emit.bind(this, 'error'));
+
+  process.nextTick(function() {
+    // this is more useful for binary mode than object mode, but it won't hurt either way
+    self.emit('format', {
+      channels: 1,
+      bitDepth: 32,
+      sampleRate: context.sampleRate,
+      signed: true,
+      float: true
+    });
+  });
+
+}
+util.inherits(MediaElementAudioStream, Readable);
+
+MediaElementAudioStream.prototype._read = function(/* bytes */) {
+  // no-op, (back-pressure flow-control doesn't really work on sound)
+};
+
+/**
+ * Converts a Buffer back into the raw Float32Array format that browsers use.
+ * Note: this is just a new DataView for the same underlying buffer -
+ * the actual audio data is not copied or changed here.
+ *
+ * @param {Buffer} chunk node-style buffer of audio data from a 'data' event or read() call
+ * @return {Float32Array} raw 32-bit float data view of audio data
+ */
+MediaElementAudioStream.toRaw = function toFloat32(chunk) {
+  return new Float32Array(chunk.buffer);
+};
+
+module.exports = MediaElementAudioStream;
diff --git a/examples/static/audio-video-deprecated/recognize-element.js b/examples/static/audio-video-deprecated/recognize-element.js
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2015 IBM Corp. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+'use strict';
+var MediaElementAudioStream = require('./media-element-audio-stream');
+var L16 = require('watson-speech/speech-to-text/webaudio-l16-stream');
+var RecognizeStream = require('watson-speech/speech-to-text/recognize-stream.js');
+var FormatStream = require('watson-speech/speech-to-text/format-stream.js');
+var assign = require('object.assign/polyfill')();
+var WritableElementStream = require('watson-speech/speech-to-text/writable-element-stream');
+
+/**
+ * Recognize audio from a <audio> or <video> element
+ *
+ * @deprecated - This method has several quality issues, and so is no longer supported.
+ *
+ * @param {Object} options - Also passed to {MediaElementAudioStream} and to {RecognizeStream}
+ * @param {String} options.token - Auth Token - see https://github.com/watson-developer-cloud/node-sdk#authorization
+ * @param {MediaElement} options.element - the <video> or <audio> element to play
+ * @param {Boolena} [options.format=true] - pipe the text through a {FormatStream} which performs light formatting
+ * @param {String|DOMElement} [options.outputElement] pipe the text to a WriteableElementStream targeting the specified element. Also defaults objectMode to true to enable interim results.
+ *
+ * @returns {RecognizeStream|FormatStream}
+ */
+module.exports = function recognizeElement(options) {
+  if (!options || !options.token) {
+    throw new Error("WatsonSpeechToText: missing required parameter: opts.token");
+  }
+
+  // the WritableElementStream works best in objectMode
+  if (options.outputElement && options.objectMode !== false) {
+    options.objectMode = true;
+  }
+
+
+  // we don't want the readable stream to have objectMode on the input even if we're setting it for the output
+  var rsOpts = assign({}, options);
+  rsOpts.readableObjectMode = options.objectMode;
+  rsOpts['content-type'] = 'audio/l16;rate=16000';
+  delete rsOpts.objectMode;
+
+  var recognizeStream = new RecognizeStream(rsOpts);
+
+  var sourceStream = new MediaElementAudioStream(options.element , {
+    objectMode: true,
+    bufferSize: options.bufferSize,
+    muteSource: options.muteSource,
+    autoPlay: options.autoPlay !== false // default to true if it's undefined
+  });
+
+  var stream = sourceStream
+    .pipe(new L16({writableObjectMode: true}))
+    .pipe(recognizeStream);
+
+  if (options.format !== false) {
+    stream = stream.pipe(new FormatStream(options));
+    stream.stop = recognizeStream.stop.bind(recognizeStream);
+  }
+
+  recognizeStream.on('stop', sourceStream.stop.bind(sourceStream));
+
+  if (options.outputElement) {
+    stream.pipe(new WritableElementStream(options))
+  }
+
+  return stream;
+};
diff --git a/examples/static/browserify-app.js b/examples/static/browserify-app.js
@@ -0,0 +1,29 @@
+// Browserify bundling example
+
+// global window.fetch pollyfill for IE/Edge & Older Chrome/FireFox
+require('whatwg-fetch');
+
+// keep the bundle slim by only requiring the necessary modules
+var recognizeMicrophone = require('watson-speech/speech-to-text/recognize-microphone');
+
+document.querySelector('#button').onclick = function () {
+
+  fetch('/api/speech-to-text/token')
+    .then(function(response) {
+      return response.text();
+    }).then(function (token) {
+
+    var stream = recognizeMicrophone({
+      token: token,
+      continuous: false, // false = automatically stop transcription the first time a pause is detected
+      outputElement: '#output' // CSS selector or DOM Element
+    });
+
+    stream.on('error', function(err) {
+      console.log(err);
+    });
+
+  }).catch(function(error) {
+    console.log(error);
+  });
+};
diff --git a/examples/static/browserify.html b/examples/static/browserify.html
diff --git a/examples/static/index.html b/examples/static/index.html