Skip to content

Commit a65e28f

Browse files
committed
Implement WebSockect Interface for STT
1 parent 60dcfa7 commit a65e28f

File tree

4 files changed

+39
-35
lines changed

4 files changed

+39
-35
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,13 @@ Node-RED Watson Nodes for IBM Cloud
77

88
<a href="https://cla-assistant.io/watson-developer-cloud/node-red-node-watson"><img src="https://cla-assistant.io/readme/badge/watson-developer-cloud/node-red-node-watson" alt="CLA assistant" /></a>
99

10+
### New in version 0.6.3
11+
- Allow input / output from Node-RED web-sockets for Speech to Text node. To
12+
enable select streaming mode. No token is needed as the Node handles this. Look
13+
out for sample flows and templates showing how to use this feature.
14+
1015
### New in version 0.6.2
11-
- Visual Recognition fix for accept-language
16+
- Visual Recognition fix for accept-language
1217

1318
### New in version 0.6.1
1419
- Allow STT Language to be dyamically configurable using msg.srclang

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "node-red-node-watson",
3-
"version": "0.6.2",
3+
"version": "0.6.3",
44
"description": "A collection of Node-RED nodes for IBM Watson services",
55
"dependencies": {
66
"async": "^1.5.2",

services/speech_to_text/v1.html

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@
8585
<div class="form-row">
8686
<label>&nbsp;</label>
8787
<input type="checkbox" id="node-input-streaming-mode" style="display: inline-block; width: auto; vertical-align: top;">
88-
<label for="node-input-streaming-mode" style="width: 70%;"> Use In Streaming Mode</label>
88+
<label for="node-input-streaming-mode" style="width: 70%;"> Streaming Mode</label>
8989
</div>
9090

9191
<div class="form-row">
@@ -141,6 +141,13 @@
141141
<p>The returned audio transcription will be returned on <code>msg.transcription</code>.</p>
142142
<p>The full response, including alternative transcriptions can be found on
143143
<code>msg.fullresult</code>.</p>
144+
145+
<p>When streaming mode is selected the node makes use of a websocket connection
146+
to communicate with the Speech to Text service. Input to the node will be from
147+
a Node-RED websocket node. Output will be from a Node-RED websocket. WebSocket input
148+
is as per the WebSocket input for Speech To Text, with an action of either
149+
<code>start</code> or <code>stop</code> or an audio blob. No token is needed
150+
as the node takes care of that step.</p>
144151
<p>For more information about the Speech To Text service, read the <a href="https://www.ibm.com/watson/services/speech-to-text/">documentation</a>.</p>
145152
</script>
146153

services/speech_to_text/v1.js

Lines changed: 24 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ module.exports = function (RED) {
145145
return Promise.resolve();
146146
}
147147

148+
// Allow the language to be overridden through msg.srclang, no check
149+
// for validity
148150
function overrideCheck(msg) {
149151
if (msg.srclang){
150152
var langCode = payloadutils.langTransToSTTFormat(msg.srclang);
@@ -153,6 +155,8 @@ module.exports = function (RED) {
153155
return Promise.resolve();
154156
}
155157

158+
159+
// Input is a standard msg.payload
156160
function payloadNonStreamCheck(msg) {
157161
var message = '';
158162

@@ -240,6 +244,9 @@ module.exports = function (RED) {
240244
return p;
241245
}
242246

247+
// The input is from a websocket stream in Node-RED.
248+
// expect action of 'start' or 'stop' or a data blob
249+
// if its a blob then its going to be audio.
243250
function processInputStream(msg) {
244251
var tmp = msg.payload;
245252

@@ -349,21 +356,24 @@ module.exports = function (RED) {
349356
return p;
350357
}
351358

359+
// If we are going to connect to STT through websockets then its going to
360+
// disconnect or timeout, so need to handle that occurrence.
352361
function connectIfNeeded() {
353362
console.log('re-establishing the connect');
354363
websocket = null;
355364
socketCreationInProcess = false;
356-
processSTTSocketStart()
365+
processSTTSocketStart(false)
357366
.then(() => {
358-
return Promise.resolve();
359-
//return;
367+
//return Promise.resolve();
368+
return;
360369
})
361370
.catch((err) => {
362-
return Promise.resolve();
371+
//return Promise.resolve();
372+
return;
363373
});
364374
}
365375

366-
function processSTTSocketStart() {
376+
function processSTTSocketStart(initialConnnect) {
367377
var p = new Promise(function resolver(resolve, reject) {
368378
var model = config.lang + '_' + config.band;
369379
var wsURI = '';
@@ -372,7 +382,6 @@ module.exports = function (RED) {
372382
var tmp = endpoint.replace('https', 'wss');
373383
wsURI = tmp + '/v1/recognize'
374384
+ '?watson-token=' + token + '&model=' + model;
375-
// https://stream.watsonplatform.net/speech-to-text/api
376385
} else {
377386
wsURI = 'wss://stream.watsonplatform.net/speech-to-text/api/v1/recognize'
378387
+ '?watson-token=' + token + '&model=' + model;
@@ -382,10 +391,6 @@ module.exports = function (RED) {
382391
socketCreationInProcess = true;
383392
var ws = new WebSocket(wsURI);
384393
ws.on('open', () => {
385-
console.log('******************');
386-
console.log('Web Socket is now open');
387-
388-
console.log('Signalling Start');
389394
ws.send(JSON.stringify(startPacket));
390395
websocket = ws;
391396
socketCreationInProcess = false;
@@ -402,7 +407,6 @@ module.exports = function (RED) {
402407
node.send(newMsg);
403408
if (d && d.state && 'listening' === d.state){
404409
socketListening = true;
405-
console.log('We are now listening');
406410
resolve();
407411
}
408412
});
@@ -411,7 +415,7 @@ module.exports = function (RED) {
411415
//if (websocket) {
412416
// websocket.close();
413417
//}
414-
//websocket = null;
418+
websocket = null;
415419
socketListening = false;
416420
console.log('STT Socket disconnected');
417421
setTimeout(connectIfNeeded, 1000);
@@ -420,7 +424,9 @@ module.exports = function (RED) {
420424
ws.on('error', (err) => {
421425
socketListening = false;
422426
console.log('Error Detected');
423-
reject(err);
427+
if (initialConnect) {
428+
reject(err);
429+
}
424430
});
425431

426432
} else {
@@ -432,8 +438,9 @@ module.exports = function (RED) {
432438
}
433439

434440

441+
// While we are waiting for a connection, stack the data input
442+
// so it can be processed, when the connection becomes available.
435443
function stackAudioFile(audioData) {
436-
console.log('Pushing onto the stack');
437444
audioStack.push(audioData);
438445
return Promise.resolve();
439446
}
@@ -442,8 +449,6 @@ module.exports = function (RED) {
442449
if (audioStack && audioStack.length) {
443450
audioStack.forEach((a) => {
444451
if (a && a.action && 'data' === a.action) {
445-
console.log('sending data from stack');
446-
console.log(a.action);
447452
websocket.send(a.data);
448453
}
449454
});
@@ -453,36 +458,22 @@ module.exports = function (RED) {
453458

454459
function sendAudioSTTSocket(audioData) {
455460
var p = new Promise(function resolver(resolve, reject) {
456-
//console.log('Sending Audio - outer ');
457-
//console.log(audioData);
458461
// send stack First
459462
sendTheStack();
460463
if (audioData && audioData.action) {
461-
console.log('action type is ', audioData.action);
462464
if ('data' === audioData.action) {
463-
//console.log('Sending Audio - inner');
464-
//console.log(audioData.data);
465-
console.log('sending data from input');
466465
websocket.send(audioData.data, (error) => {
467466
if (error) {
468-
console.log(error);
469467
reject(error);
470468
} else {
471469
resolve();
472470
}
473471
});
474472
} else {
475-
//var message = { action: 'stop' };
476473
if (audioData.action === 'stop') {
477-
console.log('Signalling Stop');
478474
websocket.send(JSON.stringify(audioData));
479475
socketListening = false;
480-
481-
// Closing as refresh doesn't appear to work
482-
//websocket.close();
483-
//websocket = null;
484476
}
485-
//websocket.send(JSON.stringify(message));
486477
}
487478
}
488479
});
@@ -497,19 +488,20 @@ module.exports = function (RED) {
497488
switch (audioData.action) {
498489
case 'start':
499490
//return Promise.reject('Its a start');
500-
return processSTTSocketStart();
491+
return processSTTSocketStart(true);
501492
case 'stop':
502493
delay = 2000;
503494
case 'data':
504495
// Add a Delay to allow the listening thread to kick in
496+
// Delays for Stop is longer, so that it doesn't get actioned
497+
// before the audio buffers.
505498
setTimeout(() => {
506499
if (socketListening) {
507500
return sendAudioSTTSocket(audioData);
508501
} else {
509502
return stackAudioFile(audioData);
510503
}
511504
}, delay);
512-
//return Promise.reject('Its a data or stop');
513505
default:
514506
return Promise.resolve();
515507
}

0 commit comments

Comments
 (0)