Skip to content

Commit ea04023

Browse files
committed
update to latest microphone-stream to enable iOS cmpatibility
1 parent 85ddb5c commit ea04023

10 files changed

+233
-157
lines changed

dist/watson-speech.js

Lines changed: 86 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -5977,13 +5977,19 @@ var bufferFrom = __webpack_require__(15);
59775977
*
59785978
* @see https://developer.mozilla.org/en-US/docs/Web/API/Navigator/getUserMedia
59795979
*
5980-
* @param {MediaStream} stream https://developer.mozilla.org/en-US/docs/Web/API/MediaStream
59815980
* @param {Object} [opts] options
5981+
* @param {MediaStream} [opts.stream] https://developer.mozilla.org/en-US/docs/Web/API/MediaStream - for iOS compatibility, it is recommended that you create the MicrophoneStream instance in response to the tap - before you have a MediaStream, and then later call setStream() with the MediaStream.
59825982
* @param {Boolean} [opts.objectMode=false] Puts the stream into ObjectMode where it emits AudioBuffers instead of Buffers - see https://developer.mozilla.org/en-US/docs/Web/API/AudioBuffer
59835983
* @param {Number|null} [opts.bufferSize=null] https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
59845984
* @constructor
59855985
*/
5986-
function MicrophoneStream(stream, opts) {
5986+
function MicrophoneStream(opts) {
5987+
// backwards compatibility - passing in the Stream here will generally not work on iOS 11 Safari
5988+
if (typeof MediaStream && opts instanceof MediaStream) {
5989+
var stream = opts;
5990+
opts = arguments[1] || {};
5991+
opts.stream = stream;
5992+
}
59875993
// "It is recommended for authors to not specify this buffer size and allow the implementation to pick a good
59885994
// buffer size to balance between latency and audio quality."
59895995
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
@@ -6019,12 +6025,32 @@ function MicrophoneStream(stream, opts) {
60196025

60206026
var AudioContext = window.AudioContext || window.webkitAudioContext;
60216027
var context = new AudioContext();
6022-
var audioInput = context.createMediaStreamSource(stream);
60236028
var recorder = context.createScriptProcessor(bufferSize, inputChannels, outputChannels);
60246029

6025-
recorder.onaudioprocess = recorderProcess;
6030+
// Workaround for Safari on iOS 11 - context starts out suspended, and the resume() call must be in response to a tap.
6031+
// This allows you to create the MicrophoneStream instance synchronously in response to the first tap,
6032+
// and then connect the MediaStream asynchronously, after the user has granted microphone access.
6033+
var audioInput;
6034+
if (context.state === 'suspended') {
6035+
context.resume();
6036+
}
60266037

6027-
audioInput.connect(recorder);
6038+
/**
6039+
* Set the MediaStream
6040+
*
6041+
* This was removed from the constructor to enable better compatibility with Safari on iOS 11.
6042+
*
6043+
* @param {MediaStream} stream https://developer.mozilla.org/en-US/docs/Web/API/MediaStream
6044+
*/
6045+
this.setStream = function(stream) {
6046+
audioInput = context.createMediaStreamSource(stream);
6047+
audioInput.connect(recorder);
6048+
recorder.onaudioprocess = recorderProcess;
6049+
};
6050+
6051+
if (opts.stream) {
6052+
this.setStream(stream);
6053+
}
60286054

60296055
// other half of workaround for chrome bugs
60306056
recorder.connect(context.destination);
@@ -6039,7 +6065,9 @@ function MicrophoneStream(stream, opts) {
60396065
// This fails in some older versions of chrome. Nothing we can do about it.
60406066
}
60416067
recorder.disconnect();
6042-
audioInput.disconnect();
6068+
if (audioInput) {
6069+
audioInput.disconnect();
6070+
}
60436071
try {
60446072
context.close(); // returns a promise;
60456073
} catch (ex) {
@@ -8785,29 +8813,66 @@ module.exports = function recognizeMicrophone(options) {
87858813
var recognizeStream = new RecognizeStream(rsOpts);
87868814
var streams = [recognizeStream]; // collect all of the streams so that we can bundle up errors and send them to the last one
87878815

8816+
// set up the output first so that we have a place to emit errors
8817+
// if there's trouble with the input stream
8818+
var stream = recognizeStream;
8819+
87888820
var keepMic = options.keepMicrophone;
8789-
var getMicStream;
8821+
var micStream;
87908822
if (keepMic && preservedMicStream) {
87918823
preservedMicStream.unpipe(bitBucket);
8792-
getMicStream = Promise.resolve(preservedMicStream);
8824+
micStream = preservedMicStream;
87938825
} else {
8826+
// create the MicrophoneStream synchronously to allow it to resume the context in Safari on iOS 11
8827+
micStream = new MicrophoneStream({
8828+
objectMode: true,
8829+
bufferSize: options.bufferSize
8830+
});
87948831
var pm = options.mediaStream ? Promise.resolve(options.mediaStream) : getUserMedia({ video: false, audio: true });
8795-
8796-
getMicStream = pm.then(function(mic) {
8797-
var micStream = new MicrophoneStream(mic, {
8798-
objectMode: true,
8799-
bufferSize: options.bufferSize
8832+
pm
8833+
.then(function(mediaStream) {
8834+
micStream.setStream(mediaStream);
8835+
if (keepMic) {
8836+
preservedMicStream = micStream;
8837+
}
8838+
})
8839+
.catch(function(err) {
8840+
stream.emit('error', err);
8841+
if (err.name === 'NotSupportedError') {
8842+
stream.end(); // end the stream
8843+
}
88008844
});
8801-
if (keepMic) {
8802-
preservedMicStream = micStream;
8803-
}
8804-
return Promise.resolve(micStream);
8805-
});
88068845
}
88078846

8808-
// set up the output first so that we have a place to emit errors
8809-
// if there's trouble with the input stream
8810-
var stream = recognizeStream;
8847+
var l16Stream = new L16({ writableObjectMode: true });
8848+
8849+
micStream.pipe(l16Stream).pipe(recognizeStream);
8850+
8851+
streams.push(micStream, l16Stream);
8852+
8853+
/**
8854+
* unpipes the mic stream to prevent any more audio from being sent over the wire
8855+
* temporarily re-pipes it to the bitBucket (basically /dev/null) becuse
8856+
* otherwise it will buffer the audio from in between calls and prepend it to the next one
8857+
*
8858+
* @private
8859+
*/
8860+
function end() {
8861+
micStream.unpipe(l16Stream);
8862+
micStream.pipe(bitBucket);
8863+
l16Stream.end();
8864+
}
8865+
// trigger on both stop and end events:
8866+
// stop will not fire when a stream ends due to a timeout
8867+
// but when stop does fire, we want to honor it immediately
8868+
// end will always fire, but it may take a few moments after stop
8869+
if (keepMic) {
8870+
recognizeStream.on('end', end);
8871+
recognizeStream.on('stop', end);
8872+
} else {
8873+
recognizeStream.on('end', micStream.stop.bind(micStream));
8874+
recognizeStream.on('stop', micStream.stop.bind(micStream));
8875+
}
88118876

88128877
if (options.resultsBySpeaker) {
88138878
stream = stream.pipe(new SpeakerStream(options));
@@ -8829,49 +8894,6 @@ module.exports = function recognizeMicrophone(options) {
88298894
streams.push(stream);
88308895
}
88318896

8832-
getMicStream.catch(function(err) {
8833-
stream.emit('error', err);
8834-
if (err.name === 'NotSupportedError') {
8835-
stream.end(); // end the stream
8836-
}
8837-
});
8838-
8839-
getMicStream
8840-
.then(function(micStream) {
8841-
streams.push(micStream);
8842-
8843-
var l16Stream = new L16({ writableObjectMode: true });
8844-
8845-
micStream.pipe(l16Stream).pipe(recognizeStream);
8846-
8847-
streams.push(l16Stream);
8848-
8849-
/**
8850-
* unpipes the mic stream to prevent any more audio from being sent over the wire
8851-
* temporarily re-pipes it to the bitBucket (basically /dev/null) becuse
8852-
* otherwise it will buffer the audio from in between calls and prepend it to the next one
8853-
*
8854-
* @private
8855-
*/
8856-
function end() {
8857-
micStream.unpipe(l16Stream);
8858-
micStream.pipe(bitBucket);
8859-
l16Stream.end();
8860-
}
8861-
// trigger on both stop and end events:
8862-
// stop will not fire when a stream ends due to a timeout
8863-
// but when stop does fire, we want to honor it immediately
8864-
// end will always fire, but it may take a few moments after stop
8865-
if (keepMic) {
8866-
recognizeStream.on('end', end);
8867-
recognizeStream.on('stop', end);
8868-
} else {
8869-
recognizeStream.on('end', micStream.stop.bind(micStream));
8870-
recognizeStream.on('stop', micStream.stop.bind(micStream));
8871-
}
8872-
})
8873-
.catch(recognizeStream.emit.bind(recognizeStream, 'error'));
8874-
88758897
// Capture errors from any stream except the last one and emit them on the last one
88768898
streams.forEach(function(prevStream) {
88778899
if (prevStream !== stream) {

dist/watson-speech.min.js

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/watson-speech.min.js.map

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/static/index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ <h2>Speech to Text</h2>
1717
<h3>Microphone Input</h3>
1818
<ul>
1919
<li><a href="microphone-streaming.html">Transcribe from Microphone</a></li>
20+
<li><a href="microphone-streaming-preload-token.html">Transcribe from Microphone - w/ pre-loaded token</a></li>
2021
<li><a href="microphone-streaming-auto-stop.html">Transcribe from Microphone, automatically stop at first pause</a></li>
2122
<li><a href="microphone-streaming-model.html">Transcribe from Microphone, with chosen model</a></li>
2223
<li><a href="microphone-alternatives.html">Transcribe from Microphone, with Alternatives</a></li>
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Pre-loaded Token - Watson Speech to Text client example</title>
6+
<link rel="stylesheet" href="style.css" />
7+
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8+
</head>
9+
<body>
10+
<div class="container">
11+
<a href="/">&laquo; Examples</a>
12+
13+
<section>
14+
<h2>Transcribe from Microphone - With Pre-loaded Token</h2>
15+
<button id="button">Start Microphone Transcription</button>
16+
<button id="stop">Stop</button>
17+
18+
<h2>Output:</h2>
19+
<div id="output">--</div>
20+
</section>
21+
22+
<script src="bower_components/watson-speech/dist/watson-speech.js"></script>
23+
<!-- window.fetch pollyfill for IE/Edge & Older Chrome/FireFox -->
24+
<script src="bower_components/fetch/fetch.js"></script>
25+
26+
<h2>Code for this demo:</h2>
27+
28+
<pre><code><script style="display: block;">
29+
var token;
30+
fetch('/api/speech-to-text/token')
31+
.then(function(response) {
32+
return response.text();
33+
}).then(function(_token) {
34+
token = _token;
35+
}).catch(function(error) {
36+
console.log(error);
37+
});
38+
39+
40+
document.querySelector('#button').onclick = function () {
41+
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
42+
token: token,
43+
outputElement: '#output' // CSS selector or DOM Element
44+
});
45+
46+
stream.on('error', function(err) {
47+
console.log(err);
48+
});
49+
50+
document.querySelector('#stop').onclick = function() {
51+
stream.stop();
52+
};
53+
};
54+
55+
</script></code></pre>
56+
57+
</div>
58+
</body>
59+
</html>

examples/static/microphone-streaming-text-to-console.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ <h2>Code for this demo:</h2>
3535

3636
var stream = WatsonSpeech.SpeechToText.recognizeMicrophone({
3737
token: token,
38-
object_mode: false // default
38+
object_mode: false
3939
});
4040

4141
stream.setEncoding('utf8'); // get text instead of Buffers for on data events

examples/static/text-to-speech-preload.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<html lang="en">
33
<head>
44
<meta charset="UTF-8">
5-
<title>Basic Example - Watson Text to Speech</title>
5+
<title>Pre-loaded Example - Watson Text to Speech</title>
66
<link rel="stylesheet" href="style.css" />
77
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
88
</head>
@@ -11,7 +11,7 @@
1111
<a href="/">&laquo; Examples</a>
1212

1313
<section>
14-
<h2>Basic Text to Speech Example</h2>
14+
<h2>Pre-loaded Example</h2>
1515
<p><button id="button">Play Synthesized Text</button></p>
1616
</section>
1717

0 commit comments

Comments
 (0)