Skip to content

Commit a78e3cd

Browse files
committed
optimized and fixed STT module
1 parent 5fe74a8 commit a78e3cd

File tree

2 files changed

+63
-21
lines changed

2 files changed

+63
-21
lines changed

src/modules/shared/sepia-stt-socket-client.js

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ class SepiaSttSocketClient {
203203

204204
}else if (msgJson.type == "result"){
205205
this._onResult(msgJson);
206-
if (msgJson.isFinal && !this.activeAsrModel.continuous && this.autoCloseOnLastFinal){
206+
if (msgJson.isFinal && !this.activeOptions.continuous && this.autoCloseOnLastFinal){
207207
//after final result, close connection
208208
this.closeConnection();
209209
}
@@ -259,10 +259,13 @@ class SepiaSttSocketClient {
259259
"msg_id": this.newMessageId()
260260
});
261261
}
262-
sendAudioEnd(byteLength){
262+
sendAudioEnd(byteLength, bufferOrTimeLimit){
263263
return this.sendJson({
264264
"type": "audioend",
265-
"data": {"byteLength": byteLength},
265+
"data": {
266+
"byteLength": byteLength,
267+
"bufferOrTimeLimit": bufferOrTimeLimit
268+
},
266269
"msg_id": this.newMessageId()
267270
});
268271
}

src/modules/stt-socket-worker.js

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ let _lookbackBufferSize;
6767
let _lookbackRingBuffer;
6868
let recordedBuffers;
6969
let _sentBuffers;
70+
let _shiftedBuffers;
7071
let recordBufferMaxN;
72+
let continuous; //NOTE: this is an engine option but influences full buffer as well
7173

7274
let gateIsOpen = false;
7375
let _gateOpenTS = 0;
@@ -91,6 +93,7 @@ function init(){
9193
}
9294
recordedBuffers = [];
9395
_sentBuffers = 0;
96+
_shiftedBuffers = 0;
9497
_isFirstValidProcess = true;
9598
gateIsOpen = false;
9699
_gateOpenTS = 0;
@@ -109,8 +112,10 @@ function gateControl(open, gateOptions){
109112
//we always reset the buffer
110113
recordedBuffers = [];
111114
_sentBuffers = 0;
115+
_shiftedBuffers = 0;
112116
_gateOpenTS = Date.now();
113117
gateIsOpen = true;
118+
msg.gate.isOpen = true;
114119
msg.gate.openedAt = _gateOpenTS;
115120
//open connection
116121
if (sttServer){
@@ -119,9 +124,14 @@ function gateControl(open, gateOptions){
119124
}else{
120125
_gateCloseTS = Date.now();
121126
gateIsOpen = false;
127+
msg.gate.isOpen = false;
122128
msg.gate.openedAt = _gateOpenTS;
123129
msg.gate.closedAt = _gateCloseTS;
124-
130+
var closedDueToBufferLimit = (!continuous && recordedBuffers && recordBufferMaxN
131+
&& recordedBuffers.length && recordedBuffers.length >= recordBufferMaxN);
132+
if (closedDueToBufferLimit){
133+
msg.gate.bufferOrTimeLimit = true;
134+
}
125135
//---------- DRY-RUN TEST: fake final result ----------
126136
if (enableDryRun && recordedBuffers.length && recordedBuffers.length > recordBufferMaxN/3){
127137
setTimeout(function(){
@@ -138,7 +148,7 @@ function gateControl(open, gateOptions){
138148
recordedBuffers.forEach(function(ta){
139149
byteLength += ta.byteLength;
140150
});
141-
sttServer.sendAudioEnd(byteLength); //close input and request final result
151+
sttServer.sendAudioEnd(byteLength, closedDueToBufferLimit); //close input and request final result
142152
}
143153

144154
//send WAV?
@@ -148,7 +158,6 @@ function gateControl(open, gateOptions){
148158
}, 100);
149159
}
150160
}
151-
msg.gate.isOpen = gateIsOpen;
152161
postMessage(msg);
153162
}
154163

@@ -200,8 +209,11 @@ function constructWorker(options){
200209
recordBufferMaxN = Math.ceil(recordBufferMaxN);
201210
if (recordBufferMaxN < 0) recordBufferMaxN = 0;
202211

212+
//end on first final result? - NOTE: this works a bit different than WebSpeech "continuous"
213+
continuous = (options.setup.continuous != undefined)? options.setup.continuous : false;
214+
203215
//server
204-
socketUrl = options.setup.socketUrl || "http://localhost:20741";
216+
socketUrl = options.setup.socketUrl || options.setup.serverUrl || "http://localhost:20741";
205217
clientId = options.setup.clientId || "any";
206218
accessToken = options.setup.accessToken || "test1234";
207219
messageFormat = options.setup.messageFormat || "default";
@@ -210,36 +222,37 @@ function constructWorker(options){
210222
returnAudioFile = true;
211223
}else{
212224
var asrEngineOptions = options.setup.engineOptions || {}; //interimResults (unused?), alternatives, etc.
213-
//end on first final result? - NOTE: this works a bit different than WebSpeech "continuous"
214-
var continuous = (options.setup.continuous != undefined)? options.setup.continuous : false;
215225
var optimizeFinalResult = (options.setup.optimizeFinalResult != undefined)? options.setup.optimizeFinalResult : true;
216-
var engineOptions = {
226+
var engineOptions = Object.assign({}, asrEngineOptions, {
217227
//common options
218228
samplerate: inputSampleRate,
219229
continuous: continuous,
220230
language: (options.setup.language || ""),
231+
model: (asrEngineOptions.model || ""), //e.g.: "vosk-model-small-de"
221232
optimizeFinalResult: optimizeFinalResult,
222233
//specials (e.g. for Vosk):
223-
model: (asrEngineOptions.model || ""), //e.g.: "vosk-model-small-de"
224234
/*
225235
alternatives: (asrEngineOptions.alternatives || 1),
226236
phrases: [],
227237
speaker: false,
228238
words: false
229239
*/
230240
doDebug: doDebug
231-
};
241+
});
232242
//console.error("engineOptions", engineOptions); //DEBUG
233243
var serverOptions = {
234244
onOpen: function(){
235245
if (doDebug) console.error("SttSocketWorker - DEBUG - CONNECTION OPEN");
246+
sendConnectionEvent("open");
236247
},
237248
onReady: function(activeOptions){
238249
if (doDebug) console.error("SttSocketWorker - DEBUG - CONNECTION READY", activeOptions);
250+
sendConnectionEvent("ready");
239251
startOrContinueStream();
240252
},
241253
onClose: function(){
242254
if (doDebug) console.error("SttSocketWorker - DEBUG - CONNECTION CLOSED");
255+
sendConnectionEvent("closed");
243256
},
244257
onResult: function(res){
245258
if (doDebug) console.error("SttSocketWorker - DEBUG - CONNECTION RESULT", res);
@@ -248,6 +261,10 @@ function constructWorker(options){
248261
}else{
249262
sendDefaultRecognitionResult(res);
250263
}
264+
//if result is final and gate is closed -> close connection
265+
if (res.isFinal && !gateIsOpen && sttServer && sttServer.connectionIsOpen){
266+
sttServer.closeConnection();
267+
}
251268
},
252269
onError: function(err){
253270
if (!err) err = {name: "UnknownError", message: "Unknown error"};
@@ -377,16 +394,18 @@ function startOrContinueStream(){
377394

378395
//buffer has data and some has been sent already
379396
}else if (_sentBuffers && recordedBuffers.length){
380-
if (_sentBuffers == (recordedBuffers.length - 1)){
397+
var normalizedLength = recordedBuffers.length + _shiftedBuffers; //take shifted data into account
398+
if (_sentBuffers == (normalizedLength - 1)){
381399
//send last
382-
var data = recordedBuffers[_sentBuffers];
400+
var data = recordedBuffers[recordedBuffers.length - 1];
383401
_sentBuffers++;
384402
sendBytes(data);
385403

386-
}else if (_sentBuffers < recordedBuffers.length){
404+
}else if (_sentBuffers < normalizedLength){
387405
//send rest at once
388-
var data = new Blob(recordedBuffers.slice(_sentBuffers));
389-
_sentBuffers = recordedBuffers.length;
406+
var restN = normalizedLength - _sentBuffers;
407+
var data = new Blob(recordedBuffers.slice(-1 * restN));
408+
_sentBuffers += restN;
390409
sendBytes(data);
391410

392411
}else{
@@ -408,19 +427,28 @@ function clearBuffer(){
408427
lookbackBufferIsBlocked = false;
409428
recordedBuffers = [];
410429
_sentBuffers = 0;
430+
_shiftedBuffers = 0;
411431
}
412432

413433
//reached max recording length
414434
function maxLengthReached(){
415-
//TODO: implement properly, do more ... ?
416-
gateControl(false);
435+
if (continuous){
436+
//drop old buffer
437+
var shift = (recordedBuffers.length - recordBufferMaxN);
438+
_shiftedBuffers += shift;
439+
recordedBuffers.splice(0, shift);
440+
}else{
441+
//close
442+
gateControl(false);
443+
}
444+
//TODO: do more ... ?
417445
}
418446

419447
//send result message (partial or final)
420448
function sendWebSpeechCompatibleRecognitionResult(isFinal, transcript){
421449
postMessage({
422450
recognitionEvent: {
423-
name: "result",
451+
type: "result",
424452
resultIndex: 0,
425453
results: [{
426454
isFinal: isFinal,
@@ -434,11 +462,20 @@ function sendWebSpeechCompatibleRecognitionResult(isFinal, transcript){
434462
});
435463
}
436464
function sendDefaultRecognitionResult(event){
465+
if (event && !event.type) event.type = "result";
437466
postMessage({
438467
recognitionEvent: event,
439468
eventFormat: "default"
440469
});
441470
}
471+
function sendConnectionEvent(type, data){
472+
postMessage({
473+
connectionEvent: {
474+
type: type,
475+
data: data
476+
}
477+
});
478+
}
442479
//send error message
443480
function sendWebSpeechCompatibleError(errorName, errorMessage){
444481
var eventName = "error"; //possible as well: "nomatch"
@@ -455,7 +492,7 @@ function sendWebSpeechCompatibleError(errorName, errorMessage){
455492
*/
456493
postMessage({
457494
recognitionEvent: {
458-
name: eventName,
495+
type: eventName,
459496
error: errorName,
460497
message: errorMessage,
461498
timeStamp: Date.now()
@@ -464,6 +501,7 @@ function sendWebSpeechCompatibleError(errorName, errorMessage){
464501
});
465502
}
466503
function sendDefaultErrorEvent(error){
504+
if (error && !error.type) error.type = "error";
467505
postMessage({
468506
recognitionEvent: error,
469507
eventFormat: "default"
@@ -518,6 +556,7 @@ function release(options){
518556
_lookbackRingBuffer = null;
519557
recordedBuffers = null;
520558
_sentBuffers = undefined;
559+
_shiftedBuffers = undefined;
521560
gateIsOpen = false;
522561
_gateOpenTS = 0;
523562
_gateCloseTS = 0;

0 commit comments

Comments
 (0)